diff --git "a/profile_trace/iteration_14848/rank0_trace.json" "b/profile_trace/iteration_14848/rank0_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_14848/rank0_trace.json" @@ -0,0 +1,68774 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 0, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "4E9F7886AFE44F13BCE36C53245A0DA2", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367618430.551, "dur": 137.835, + "args": { + "External id": 293377,"Record function id": 0, "Sequence number": 1209231, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367618470.301, "dur": 88.839, + "args": { + "External id": 293378,"Sequence number": 1209231, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2070547, "tid": 2107622, "ts": 5333367618470.301, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070547, "tid": 2107622, + "ts": 5333367618478.567, "dur": 78.690, + "args": { + "External id": 293379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367618582.160, "dur": 209.525, + "args": { + "External id": 293380,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367618677.325, "dur": 98.292, + "args": { + "External id": 293381,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.31", "pid": 2070547, "tid": 2107622, + "ts": 5333367618710.300, "dur": 53.117, + "args": { + "External id": 293382,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367618781.050, "dur": 2.250, + "args": { + "External id": 293383,"Sequence number": 1209230, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2070547, "tid": 2107622, "ts": 5333367618781.050, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearListNetFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367618801.610, "dur": 77483.338, + "args": { + "External id": 293384,"Record function id": 0, "Sequence number": 1209229, "Fwd thread id": 1, "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367618807.433, "dur": 77415.898, + "args": { + "External id": 293385,"Sequence number": 1209229, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8 + } + }, + { + "ph": "f", "id": 3, "pid": 2070547, "tid": 2107622, "ts": 5333367618807.433, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367618854.010, "dur": 4.631, + "args": { + "External id": 293386,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367618862.172, "dur": 77221.565, + "args": { + "External id": 293387,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367618864.310, "dur": 77218.891, + "args": { + "External id": 293388,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367618867.232, "dur": 9.395, + "args": { + "External id": 293389,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367618880.533, "dur": 77200.650, + "args": { + "External id": 293390,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070547, "tid": 2107622, + "ts": 5333367696090.067, "dur": 0.587, + "args": { + "External id": 293391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070547, "tid": 2107622, + "ts": 5333367696093.570, "dur": 3.197, + "args": { + "External id": 293392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070547, "tid": 2107622, + "ts": 5333367696095.146, "dur": 1.486, + "args": { + "External id": 293393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070547, "tid": 2107622, + "ts": 5333367696104.019, "dur": 39.669, + "args": { + "External id": 293394,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070547, "tid": 2107622, + "ts": 5333367696154.543, "dur": 61.476, + "args": { + "External id": 293395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070547, "tid": 2107622, + "ts": 5333367696156.223, "dur": 59.556, + "args": { + "External id": 293396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070547, "tid": 2107622, + "ts": 5333367696158.143, "dur": 56.902, + "args": { + "External id": 293397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367696235.735, "dur": 38.762, + "args": { + "External id": 293398,"Record function id": 0, "Concrete Inputs": ["", "15", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], []], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367696237.231, "dur": 37.051, + "args": { + "External id": 293399,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367696244.289, "dur": 6.801, + "args": { + "External id": 293400,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367696252.436, "dur": 21.337, + "args": { + "External id": 293401,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367696300.701, "dur": 12.782, + "args": { + "External id": 293402,"Record function id": 0, "Ev Idx": 25 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367696303.475, "dur": 8.338, + "args": { + "External id": 293403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367696306.671, "dur": 4.697, + "args": { + "External id": 293404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367696307.555, "dur": 3.678, + "args": { + "External id": 293405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367696317.839, "dur": 20.012, + "args": { + "External id": 293406,"Record function id": 0, "Sequence number": 1209228, "Fwd thread id": 1, "Ev Idx": 29 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367696319.181, "dur": 15.530, + "args": { + "External id": 293407,"Sequence number": 1209228, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 30 + } + }, + { + "ph": "f", "id": 4, "pid": 2070547, "tid": 2107622, "ts": 5333367696319.181, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070547, "tid": 2107622, + "ts": 5333367696325.314, "dur": 9.104, + "args": { + "External id": 293408,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367696328.989, "dur": 5.256, + "args": { + "External id": 293409,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367696342.159, "dur": 188.976, + "args": { + "External id": 293410,"Record function id": 0, "Sequence number": 1209227, "Fwd thread id": 1, "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367696352.175, "dur": 172.052, + "args": { + "External id": 293411,"Sequence number": 1209227, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 34 + } + }, + { + "ph": "f", "id": 5, "pid": 2070547, "tid": 2107622, "ts": 5333367696352.175, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367696394.376, "dur": 3.588, + "args": { + "External id": 293412,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367696400.262, "dur": 47.855, + "args": { + "External id": 293413,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367696401.040, "dur": 46.840, + "args": { + "External id": 293414,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367696402.427, "dur": 6.893, + "args": { + "External id": 293415,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367696410.340, "dur": 36.711, + "args": { + "External id": 293416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 39 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070547, "tid": 2107622, + "ts": 5333367696449.971, "dur": 0.384, + "args": { + "External id": 293417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070547, "tid": 2107622, + "ts": 5333367696451.397, "dur": 3.444, + "args": { + "External id": 293418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070547, "tid": 2107622, + "ts": 5333367696454.042, "dur": 0.651, + "args": { + "External id": 293419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070547, "tid": 2107622, + "ts": 5333367696457.338, "dur": 18.332, + "args": { + "External id": 293420,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070547, "tid": 2107622, + "ts": 5333367696479.726, "dur": 37.817, + "args": { + "External id": 293421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070547, "tid": 2107622, + "ts": 5333367696480.857, "dur": 36.506, + "args": { + "External id": 293422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070547, "tid": 2107622, + "ts": 5333367696484.318, "dur": 32.801, + "args": { + "External id": 293423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367696537.159, "dur": 7.306, + "args": { + "External id": 293424,"Record function id": 0, "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367696539.431, "dur": 4.404, + "args": { + "External id": 293425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367696540.620, "dur": 2.168, + "args": { + "External id": 293426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 49 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367696541.243, "dur": 1.461, + "args": { + "External id": 293427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367696548.363, "dur": 44.148, + "args": { + "External id": 293428,"Record function id": 0, "Sequence number": 1209226, "Fwd thread id": 1, "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070547, "tid": 2107622, + "ts": 5333367696549.339, "dur": 7.071, + "args": { + "External id": 293429,"Sequence number": 1209226, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 52 + } + }, + { + "ph": "f", "id": 6, "pid": 2070547, "tid": 2107622, "ts": 5333367696549.339, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070547, "tid": 2107622, + "ts": 5333367696551.305, "dur": 4.929, + "args": { + "External id": 293430,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367696554.308, "dur": 1.796, + "args": { + "External id": 293431,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070547, "tid": 2107622, + "ts": 5333367696560.539, "dur": 24.392, + "args": { + "External id": 293432,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367696598.136, "dur": 379.967, + "args": { + "External id": 293433,"Record function id": 0, "Sequence number": 1209225, "Fwd thread id": 1, "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367696599.442, "dur": 367.863, + "args": { + "External id": 293434,"Sequence number": 1209225, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 57 + } + }, + { + "ph": "f", "id": 7, "pid": 2070547, "tid": 2107622, "ts": 5333367696599.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367696790.454, "dur": 49.603, + "args": { + "External id": 293435,"kernel_hash": "c2likoimxa3yi6j6dwntng3kzuvmiu64h4ync66w4mrx4v7x6wto", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2l/c2likoimxa3yi6j6dwntng3kzuvmiu64h4ync66w4mrx4v7x6wto.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367696878.622, "dur": 27.453, + "args": { + "External id": 293436,"kernel_hash": "c2mqya6ilpprpgchmqenn6k5mpg3mienn3ill2mhoh2uq42xagjg", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2m/c2mqya6ilpprpgchmqenn6k5mpg3mienn3ill2mhoh2uq42xagjg.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 59 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367696923.231, "dur": 20.836, + "args": { + "External id": 293437,"kernel_hash": "cstbyq2jrfkh2pvulirtdouqrjc47q3sh3z3auux63nbg6d6xj43", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/st/cstbyq2jrfkh2pvulirtdouqrjc47q3sh3z3auux63nbg6d6xj43.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367696987.565, "dur": 9.201, + "args": { + "External id": 293438,"Record function id": 0, "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367696989.715, "dur": 6.393, + "args": { + "External id": 293439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367696991.908, "dur": 3.463, + "args": { + "External id": 293440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367696992.835, "dur": 2.446, + "args": { + "External id": 293441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367697001.351, "dur": 2819.124, + "args": { + "External id": 293442,"Record function id": 0, "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.31)", "pid": 2070547, "tid": 2107622, + "ts": 5333367697027.526, "dur": 969.972, + "args": { + "External id": 293443,"Record function id": 0, "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.30", "pid": 2070547, "tid": 2107622, + "ts": 5333367697052.050, "dur": 936.450, + "args": { + "External id": 293444,"Record function id": 0, "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070547, "tid": 2107622, + "ts": 5333367697067.367, "dur": 905.429, + "args": { + "External id": 293445,"Record function id": 0, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367697161.511, "dur": 22.886, + "args": { + "External id": 293446,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 69 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367697202.220, "dur": 40.078, + "args": { + "External id": 293447,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697208.468, "dur": 3.974, + "args": { + "External id": 293448,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697213.673, "dur": 0.611, + "args": { + "External id": 293449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697215.118, "dur": 3.013, + "args": { + "External id": 293450,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697222.921, "dur": 0.564, + "args": { + "External id": 293451,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 74 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697224.232, "dur": 0.768, + "args": { + "External id": 293452,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697225.635, "dur": 0.336, + "args": { + "External id": 293453,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697230.772, "dur": 0.381, + "args": { + "External id": 293454,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697231.658, "dur": 0.615, + "args": { + "External id": 293455,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697234.712, "dur": 2.109, + "args": { + "External id": 293456,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367697254.969, "dur": 37.829, + "args": { + "External id": 293457,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367697335.371, "dur": 117.591, + "args": { + "External id": 293458,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367697348.238, "dur": 5.009, + "args": { + "External id": 293459,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367697358.618, "dur": 14.285, + "args": { + "External id": 293460,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367697363.403, "dur": 9.069, + "args": { + "External id": 293461,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697368.305, "dur": 2.869, + "args": { + "External id": 293462,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367697379.169, "dur": 27.427, + "args": { + "External id": 293463,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697380.441, "dur": 0.589, + "args": { + "External id": 293464,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697383.674, "dur": 0.634, + "args": { + "External id": 293465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697384.892, "dur": 2.203, + "args": { + "External id": 293466,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 89 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697387.584, "dur": 0.832, + "args": { + "External id": 293467,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697390.473, "dur": 0.581, + "args": { + "External id": 293468,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697393.536, "dur": 0.383, + "args": { + "External id": 293469,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697396.123, "dur": 0.838, + "args": { + "External id": 293470,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 93 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697397.432, "dur": 2.615, + "args": { + "External id": 293471,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367697402.436, "dur": 0.386, + "args": { + "External id": 293472,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367697419.444, "dur": 25.457, + "args": { + "External id": 293473,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367697518.027, "dur": 346.912, + "args": { + "External id": 293474,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367697549.169, "dur": 310.345, + "args": { + "External id": 293475,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 98, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367697560.807, "dur": 292.077, + "args": { + "External id": 293476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367697892.021, "dur": 2.296, + "args": { + "External id": 293477,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 100, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367698006.151, "dur": 1787.611, + "args": { + "External id": 293478,"Sequence number": 1209224, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 101 + } + }, + { + "ph": "f", "id": 8, "pid": 2070547, "tid": 2107622, "ts": 5333367698006.151, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698129.143, "dur": 140.207, + "args": { + "External id": 293479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367698317.350, "dur": 43.809, + "args": { + "External id": 293480,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698383.178, "dur": 58.107, + "args": { + "External id": 293481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698451.166, "dur": 35.898, + "args": { + "External id": 293482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698497.884, "dur": 45.841, + "args": { + "External id": 293483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698550.599, "dur": 28.471, + "args": { + "External id": 293484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698588.676, "dur": 81.403, + "args": { + "External id": 293485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367698702.405, "dur": 26.310, + "args": { + "External id": 293486,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367698750.580, "dur": 30.566, + "args": { + "External id": 293487,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367698806.462, "dur": 21.911, + "args": { + "External id": 293488,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367698851.752, "dur": 16.186, + "args": { + "External id": 293489,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698879.955, "dur": 38.510, + "args": { + "External id": 293490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367698921.721, "dur": 33.549, + "args": { + "External id": 293491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367698987.397, "dur": 211.899, + "args": { + "External id": 293492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367699083.666, "dur": 5.658, + "args": { + "External id": 293493,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367699091.034, "dur": 1.936, + "args": { + "External id": 293494,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367699238.827, "dur": 28.511, + "args": { + "External id": 293495,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367699280.980, "dur": 16.124, + "args": { + "External id": 293496,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367699308.518, "dur": 39.502, + "args": { + "External id": 293497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367699354.159, "dur": 35.738, + "args": { + "External id": 293498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367699396.911, "dur": 22.301, + "args": { + "External id": 293499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367699424.086, "dur": 35.349, + "args": { + "External id": 293500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367699466.737, "dur": 21.291, + "args": { + "External id": 293501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367699494.277, "dur": 28.720, + "args": { + "External id": 293502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367699544.893, "dur": 24.449, + "args": { + "External id": 293503,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367699588.930, "dur": 27.805, + "args": { + "External id": 293504,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367699672.139, "dur": 21.273, + "args": { + "External id": 293505,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367699712.101, "dur": 14.413, + "args": { + "External id": 293506,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367699743.334, "dur": 17.942, + "args": { + "External id": 293507,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699842.762, "dur": 14.713, + "args": { + "External id": 293508,"Record function id": 0, "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699845.945, "dur": 10.534, + "args": { + "External id": 293509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699850.193, "dur": 5.251, + "args": { + "External id": 293510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699851.677, "dur": 3.663, + "args": { + "External id": 293511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699861.252, "dur": 4.373, + "args": { + "External id": 293512,"Record function id": 0, "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699862.534, "dur": 2.700, + "args": { + "External id": 293513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699863.212, "dur": 1.402, + "args": { + "External id": 293514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699863.727, "dur": 0.820, + "args": { + "External id": 293515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699868.901, "dur": 4.614, + "args": { + "External id": 293516,"Record function id": 0, "Ev Idx": 139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699870.455, "dur": 2.648, + "args": { + "External id": 293517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699871.040, "dur": 1.544, + "args": { + "External id": 293518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699871.781, "dur": 0.706, + "args": { + "External id": 293519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699876.751, "dur": 3.916, + "args": { + "External id": 293520,"Record function id": 0, "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699877.771, "dur": 2.512, + "args": { + "External id": 293521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699878.288, "dur": 1.448, + "args": { + "External id": 293522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699878.766, "dur": 0.902, + "args": { + "External id": 293523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699883.788, "dur": 5.684, + "args": { + "External id": 293524,"Record function id": 0, "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699887.077, "dur": 1.982, + "args": { + "External id": 293525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699887.576, "dur": 1.032, + "args": { + "External id": 293526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699887.904, "dur": 0.629, + "args": { + "External id": 293527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699895.687, "dur": 3.222, + "args": { + "External id": 293528,"Record function id": 0, "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699896.534, "dur": 1.989, + "args": { + "External id": 293529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699896.990, "dur": 1.105, + "args": { + "External id": 293530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699897.301, "dur": 0.720, + "args": { + "External id": 293531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699902.110, "dur": 7.986, + "args": { + "External id": 293532,"Record function id": 0, "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699903.265, "dur": 6.441, + "args": { + "External id": 293533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699903.711, "dur": 5.431, + "args": { + "External id": 293534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699905.972, "dur": 3.102, + "args": { + "External id": 293535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699913.152, "dur": 3.418, + "args": { + "External id": 293536,"Record function id": 0, "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699913.990, "dur": 2.180, + "args": { + "External id": 293537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699914.460, "dur": 1.156, + "args": { + "External id": 293538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699914.730, "dur": 0.826, + "args": { + "External id": 293539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699920.393, "dur": 4.285, + "args": { + "External id": 293540,"Record function id": 0, "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367699921.681, "dur": 2.615, + "args": { + "External id": 293541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699922.295, "dur": 1.320, + "args": { + "External id": 293542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367699922.756, "dur": 0.792, + "args": { + "External id": 293543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367699928.783, "dur": 36753.243, + "args": { + "External id": 293544,"Record function id": 0, "Sequence number": 1209223, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367699930.410, "dur": 36741.477, + "args": { + "External id": 293545,"Sequence number": 1209223, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 9, "pid": 2070547, "tid": 2107622, "ts": 5333367699930.410, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.31)", "pid": 2070547, "tid": 2107622, + "ts": 5333367699961.840, "dur": 41.606, + "args": { + "External id": 293546,"Record function id": 0, "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.31)", "pid": 2070547, "tid": 2107622, + "ts": 5333367700011.225, "dur": 87.270, + "args": { + "External id": 293547,"Record function id": 0, "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.31)", "pid": 2070547, "tid": 2107622, + "ts": 5333367700105.204, "dur": 36557.079, + "args": { + "External id": 293548,"Record function id": 0, "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367700183.998, "dur": 8.245, + "args": { + "External id": 293549,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367700202.984, "dur": 5.124, + "args": { + "External id": 293550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367700228.188, "dur": 35562.632, + "args": { + "External id": 293551,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367700244.539, "dur": 35536.287, + "args": { + "External id": 293552,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367700333.905, "dur": 4.218, + "args": { + "External id": 293553,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367700348.440, "dur": 35392.351, + "args": { + "External id": 293554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367700353.071, "dur": 35387.037, + "args": { + "External id": 293555,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367700356.083, "dur": 5.103, + "args": { + "External id": 293556,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367700362.713, "dur": 35373.798, + "args": { + "External id": 293557,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367735890.076, "dur": 9.837, + "args": { + "External id": 293558,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367735893.144, "dur": 6.362, + "args": { + "External id": 293559,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367735934.066, "dur": 384.367, + "args": { + "External id": 293560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367735967.483, "dur": 345.926, + "args": { + "External id": 293561,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 184, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367735980.075, "dur": 327.456, + "args": { + "External id": 293562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367736341.506, "dur": 2.008, + "args": { + "External id": 293563,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 186, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736405.838, "dur": 8.805, + "args": { + "External id": 293564,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736466.288, "dur": 1.210, + "args": { + "External id": 293565,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736483.027, "dur": 1.145, + "args": { + "External id": 293566,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736496.598, "dur": 0.927, + "args": { + "External id": 293567,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736508.490, "dur": 2.739, + "args": { + "External id": 293568,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736523.253, "dur": 0.992, + "args": { + "External id": 293569,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736537.660, "dur": 1.251, + "args": { + "External id": 293570,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736552.594, "dur": 2.062, + "args": { + "External id": 293571,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736564.344, "dur": 3.219, + "args": { + "External id": 293572,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367736698.735, "dur": 2847.555, + "args": { + "External id": 293573,"Record function id": 0, "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.30)", "pid": 2070547, "tid": 2107622, + "ts": 5333367736719.054, "dur": 1100.404, + "args": { + "External id": 293574,"Record function id": 0, "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070547, "tid": 2107622, + "ts": 5333367736734.462, "dur": 360.721, + "args": { + "External id": 293575,"Record function id": 0, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736825.666, "dur": 4.850, + "args": { + "External id": 293576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736833.903, "dur": 1.153, + "args": { + "External id": 293577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736836.649, "dur": 1.151, + "args": { + "External id": 293578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736841.432, "dur": 1.365, + "args": { + "External id": 293579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736845.760, "dur": 1.394, + "args": { + "External id": 293580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736848.359, "dur": 0.559, + "args": { + "External id": 293581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736850.192, "dur": 3.944, + "args": { + "External id": 293582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736860.675, "dur": 0.997, + "args": { + "External id": 293583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736862.879, "dur": 0.847, + "args": { + "External id": 293584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367736865.009, "dur": 0.732, + "args": { + "External id": 293585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367736885.404, "dur": 178.527, + "args": { + "External id": 293586,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367736906.408, "dur": 153.085, + "args": { + "External id": 293587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367736939.070, "dur": 13.511, + "args": { + "External id": 293588,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367736957.229, "dur": 71.477, + "args": { + "External id": 293589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367736959.898, "dur": 68.432, + "args": { + "External id": 293590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367736963.053, "dur": 7.525, + "args": { + "External id": 293591,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367736972.168, "dur": 55.432, + "args": { + "External id": 293592,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.29", "pid": 2070547, "tid": 2107622, + "ts": 5333367737204.115, "dur": 607.620, + "args": { + "External id": 293593,"Record function id": 0, "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070547, "tid": 2107622, + "ts": 5333367737220.554, "dur": 577.620, + "args": { + "External id": 293594,"Record function id": 0, "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367737277.400, "dur": 6.190, + "args": { + "External id": 293595,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367737300.354, "dur": 42.848, + "args": { + "External id": 293596,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737304.486, "dur": 1.382, + "args": { + "External id": 293597,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737307.323, "dur": 2.426, + "args": { + "External id": 293598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737311.718, "dur": 0.626, + "args": { + "External id": 293599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737315.339, "dur": 0.648, + "args": { + "External id": 293600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737316.519, "dur": 0.564, + "args": { + "External id": 293601,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737318.972, "dur": 2.762, + "args": { + "External id": 293602,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737332.122, "dur": 0.507, + "args": { + "External id": 293603,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737335.331, "dur": 0.581, + "args": { + "External id": 293604,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737338.092, "dur": 0.383, + "args": { + "External id": 293605,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367737353.351, "dur": 35.585, + "args": { + "External id": 293606,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367737423.937, "dur": 100.176, + "args": { + "External id": 293607,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367737433.518, "dur": 5.597, + "args": { + "External id": 293608,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367737443.966, "dur": 9.085, + "args": { + "External id": 293609,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367737448.055, "dur": 4.573, + "args": { + "External id": 293610,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737450.823, "dur": 0.675, + "args": { + "External id": 293611,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367737459.769, "dur": 25.296, + "args": { + "External id": 293612,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737462.611, "dur": 0.293, + "args": { + "External id": 293613,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737463.675, "dur": 0.716, + "args": { + "External id": 293614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737466.453, "dur": 0.564, + "args": { + "External id": 293615,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737468.966, "dur": 0.538, + "args": { + "External id": 293616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737470.065, "dur": 1.647, + "args": { + "External id": 293617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737473.933, "dur": 0.532, + "args": { + "External id": 293618,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737475.177, "dur": 2.514, + "args": { + "External id": 293619,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737479.443, "dur": 0.164, + "args": { + "External id": 293620,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367737481.237, "dur": 0.358, + "args": { + "External id": 293621,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367737496.957, "dur": 19.564, + "args": { + "External id": 293622,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367737565.987, "dur": 159.657, + "args": { + "External id": 293623,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367737596.584, "dur": 125.206, + "args": { + "External id": 293624,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 247, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367737605.033, "dur": 112.053, + "args": { + "External id": 293625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367737744.732, "dur": 1.924, + "args": { + "External id": 293626,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 249, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367737826.780, "dur": 1695.321, + "args": { + "External id": 293627,"Sequence number": 1209222, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 250 + } + }, + { + "ph": "f", "id": 10, "pid": 2070547, "tid": 2107622, "ts": 5333367737826.780, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367737938.782, "dur": 103.801, + "args": { + "External id": 293628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367738080.857, "dur": 44.464, + "args": { + "External id": 293629,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367738143.360, "dur": 71.632, + "args": { + "External id": 293630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367738228.877, "dur": 34.190, + "args": { + "External id": 293631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367738269.327, "dur": 44.109, + "args": { + "External id": 293632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367738322.143, "dur": 27.039, + "args": { + "External id": 293633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367738356.521, "dur": 40.835, + "args": { + "External id": 293634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367738424.857, "dur": 26.476, + "args": { + "External id": 293635,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367738469.695, "dur": 32.257, + "args": { + "External id": 293636,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367738523.412, "dur": 25.499, + "args": { + "External id": 293637,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367738560.185, "dur": 15.329, + "args": { + "External id": 293638,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367738586.111, "dur": 67.576, + "args": { + "External id": 293639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367738659.886, "dur": 38.543, + "args": { + "External id": 293640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367738728.395, "dur": 203.631, + "args": { + "External id": 293641,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367738815.715, "dur": 5.454, + "args": { + "External id": 293642,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367738822.908, "dur": 4.781, + "args": { + "External id": 293643,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367738971.297, "dur": 27.686, + "args": { + "External id": 293644,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367739012.563, "dur": 16.032, + "args": { + "External id": 293645,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367739036.450, "dur": 37.167, + "args": { + "External id": 293646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367739079.272, "dur": 35.428, + "args": { + "External id": 293647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367739121.102, "dur": 22.732, + "args": { + "External id": 293648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367739150.050, "dur": 44.945, + "args": { + "External id": 293649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367739203.939, "dur": 38.421, + "args": { + "External id": 293650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367739255.762, "dur": 35.551, + "args": { + "External id": 293651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367739315.309, "dur": 25.152, + "args": { + "External id": 293652,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367739361.507, "dur": 23.816, + "args": { + "External id": 293653,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367739404.877, "dur": 18.462, + "args": { + "External id": 293654,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367739439.140, "dur": 17.401, + "args": { + "External id": 293655,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367739473.117, "dur": 18.531, + "args": { + "External id": 293656,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739567.673, "dur": 14.537, + "args": { + "External id": 293657,"Record function id": 0, "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739571.186, "dur": 10.092, + "args": { + "External id": 293658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739575.190, "dur": 5.246, + "args": { + "External id": 293659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739576.572, "dur": 3.775, + "args": { + "External id": 293660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739586.072, "dur": 4.484, + "args": { + "External id": 293661,"Record function id": 0, "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739587.358, "dur": 2.787, + "args": { + "External id": 293662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739588.465, "dur": 1.220, + "args": { + "External id": 293663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739588.830, "dur": 0.760, + "args": { + "External id": 293664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739593.985, "dur": 9.167, + "args": { + "External id": 293665,"Record function id": 0, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739599.957, "dur": 2.791, + "args": { + "External id": 293666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739600.552, "dur": 1.609, + "args": { + "External id": 293667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739601.175, "dur": 0.892, + "args": { + "External id": 293668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739606.291, "dur": 6.672, + "args": { + "External id": 293669,"Record function id": 0, "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739607.537, "dur": 4.986, + "args": { + "External id": 293670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739608.189, "dur": 3.774, + "args": { + "External id": 293671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739608.874, "dur": 2.994, + "args": { + "External id": 293672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739616.075, "dur": 41.126, + "args": { + "External id": 293673,"Record function id": 0, "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739617.108, "dur": 38.777, + "args": { + "External id": 293674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739618.028, "dur": 36.745, + "args": { + "External id": 293675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739652.748, "dur": 1.733, + "args": { + "External id": 293676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739662.745, "dur": 5.121, + "args": { + "External id": 293677,"Record function id": 0, "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739664.173, "dur": 3.297, + "args": { + "External id": 293678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739665.448, "dur": 1.433, + "args": { + "External id": 293679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739665.910, "dur": 0.908, + "args": { + "External id": 293680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739671.196, "dur": 6.437, + "args": { + "External id": 293681,"Record function id": 0, "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739672.358, "dur": 4.876, + "args": { + "External id": 293682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739673.693, "dur": 3.110, + "args": { + "External id": 293683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739676.033, "dur": 0.682, + "args": { + "External id": 293684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739680.696, "dur": 3.886, + "args": { + "External id": 293685,"Record function id": 0, "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739681.980, "dur": 2.191, + "args": { + "External id": 293686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739682.620, "dur": 1.009, + "args": { + "External id": 293687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739682.932, "dur": 0.613, + "args": { + "External id": 293688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739687.718, "dur": 3.619, + "args": { + "External id": 293689,"Record function id": 0, "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367739688.946, "dur": 2.022, + "args": { + "External id": 293690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739689.410, "dur": 0.993, + "args": { + "External id": 293691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367739689.662, "dur": 0.673, + "args": { + "External id": 293692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367739696.589, "dur": 36869.343, + "args": { + "External id": 293693,"Record function id": 0, "Sequence number": 1209221, "Fwd thread id": 1, "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367739697.977, "dur": 36859.070, + "args": { + "External id": 293694,"Sequence number": 1209221, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 317 + } + }, + { + "ph": "f", "id": 11, "pid": 2070547, "tid": 2107622, "ts": 5333367739697.977, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.30)", "pid": 2070547, "tid": 2107622, + "ts": 5333367739728.022, "dur": 41.204, + "args": { + "External id": 293695,"Record function id": 0, "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.30)", "pid": 2070547, "tid": 2107622, + "ts": 5333367739776.836, "dur": 64.112, + "args": { + "External id": 293696,"Record function id": 0, "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.30)", "pid": 2070547, "tid": 2107622, + "ts": 5333367739849.621, "dur": 36700.235, + "args": { + "External id": 293697,"Record function id": 0, "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367739937.047, "dur": 7.008, + "args": { + "External id": 293698,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367739953.411, "dur": 4.643, + "args": { + "External id": 293699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367739972.403, "dur": 35741.923, + "args": { + "External id": 293700,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367739985.784, "dur": 35719.252, + "args": { + "External id": 293701,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367740041.512, "dur": 16.449, + "args": { + "External id": 293702,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367740065.051, "dur": 35601.080, + "args": { + "External id": 293703,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367740068.456, "dur": 35596.986, + "args": { + "External id": 293704,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367740071.595, "dur": 4.759, + "args": { + "External id": 293705,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367740078.030, "dur": 35583.954, + "args": { + "External id": 293706,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367775803.087, "dur": 10.027, + "args": { + "External id": 293707,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367775805.975, "dur": 6.743, + "args": { + "External id": 293708,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367775842.825, "dur": 399.552, + "args": { + "External id": 293709,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367775875.650, "dur": 361.580, + "args": { + "External id": 293710,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 333, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367775887.525, "dur": 343.497, + "args": { + "External id": 293711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367776266.572, "dur": 2.303, + "args": { + "External id": 293712,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 335, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776328.232, "dur": 6.439, + "args": { + "External id": 293713,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776381.874, "dur": 1.382, + "args": { + "External id": 293714,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776401.007, "dur": 1.327, + "args": { + "External id": 293715,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776418.128, "dur": 1.361, + "args": { + "External id": 293716,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776432.013, "dur": 0.774, + "args": { + "External id": 293717,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776444.711, "dur": 0.781, + "args": { + "External id": 293718,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776459.233, "dur": 1.079, + "args": { + "External id": 293719,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776474.088, "dur": 2.172, + "args": { + "External id": 293720,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776487.449, "dur": 1.070, + "args": { + "External id": 293721,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367776579.974, "dur": 2794.416, + "args": { + "External id": 293722,"Record function id": 0, "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.29)", "pid": 2070547, "tid": 2107622, + "ts": 5333367776598.608, "dur": 1084.688, + "args": { + "External id": 293723,"Record function id": 0, "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070547, "tid": 2107622, + "ts": 5333367776612.614, "dur": 356.275, + "args": { + "External id": 293724,"Record function id": 0, "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776727.020, "dur": 4.803, + "args": { + "External id": 293725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776735.165, "dur": 0.913, + "args": { + "External id": 293726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776737.846, "dur": 0.784, + "args": { + "External id": 293727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776742.364, "dur": 3.033, + "args": { + "External id": 293728,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776746.810, "dur": 1.010, + "args": { + "External id": 293729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776749.283, "dur": 1.128, + "args": { + "External id": 293730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776752.112, "dur": 2.022, + "args": { + "External id": 293731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776757.845, "dur": 0.832, + "args": { + "External id": 293732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776759.962, "dur": 0.949, + "args": { + "External id": 293733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367776762.186, "dur": 0.670, + "args": { + "External id": 293734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367776780.430, "dur": 160.061, + "args": { + "External id": 293735,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367776797.108, "dur": 138.579, + "args": { + "External id": 293736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367776815.423, "dur": 13.732, + "args": { + "External id": 293737,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367776832.883, "dur": 75.390, + "args": { + "External id": 293738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367776835.417, "dur": 72.538, + "args": { + "External id": 293739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367776839.206, "dur": 9.739, + "args": { + "External id": 293740,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367776850.514, "dur": 56.707, + "args": { + "External id": 293741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.28", "pid": 2070547, "tid": 2107622, + "ts": 5333367777048.025, "dur": 627.108, + "args": { + "External id": 293742,"Record function id": 0, "Ev Idx": 365 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070547, "tid": 2107622, + "ts": 5333367777065.408, "dur": 553.056, + "args": { + "External id": 293743,"Record function id": 0, "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367777118.548, "dur": 4.833, + "args": { + "External id": 293744,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367777138.654, "dur": 48.285, + "args": { + "External id": 293745,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777143.581, "dur": 1.398, + "args": { + "External id": 293746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777148.353, "dur": 0.463, + "args": { + "External id": 293747,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777150.205, "dur": 0.500, + "args": { + "External id": 293748,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777152.156, "dur": 1.533, + "args": { + "External id": 293749,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777155.352, "dur": 0.408, + "args": { + "External id": 293750,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777157.145, "dur": 0.354, + "args": { + "External id": 293751,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777160.513, "dur": 1.966, + "args": { + "External id": 293752,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777163.807, "dur": 0.628, + "args": { + "External id": 293753,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777181.151, "dur": 0.633, + "args": { + "External id": 293754,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367777197.880, "dur": 33.940, + "args": { + "External id": 293755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367777268.772, "dur": 110.849, + "args": { + "External id": 293756,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367777278.855, "dur": 4.220, + "args": { + "External id": 293757,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367777287.928, "dur": 11.467, + "args": { + "External id": 293758,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367777292.033, "dur": 6.950, + "args": { + "External id": 293759,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777295.353, "dur": 2.349, + "args": { + "External id": 293760,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367777306.474, "dur": 34.893, + "args": { + "External id": 293761,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777312.513, "dur": 0.394, + "args": { + "External id": 293762,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777314.748, "dur": 0.667, + "args": { + "External id": 293763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777316.750, "dur": 0.564, + "args": { + "External id": 293764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777320.482, "dur": 1.715, + "args": { + "External id": 293765,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777323.425, "dur": 0.387, + "args": { + "External id": 293766,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777325.367, "dur": 1.868, + "args": { + "External id": 293767,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777328.680, "dur": 0.513, + "args": { + "External id": 293768,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777333.519, "dur": 0.374, + "args": { + "External id": 293769,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367777336.947, "dur": 0.413, + "args": { + "External id": 293770,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367777352.832, "dur": 19.758, + "args": { + "External id": 293771,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367777424.361, "dur": 125.743, + "args": { + "External id": 293772,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367777456.404, "dur": 90.216, + "args": { + "External id": 293773,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 396, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367777465.868, "dur": 75.192, + "args": { + "External id": 293774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367777567.095, "dur": 1.990, + "args": { + "External id": 293775,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 398, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367777691.570, "dur": 1657.509, + "args": { + "External id": 293776,"Sequence number": 1209220, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 399 + } + }, + { + "ph": "f", "id": 12, "pid": 2070547, "tid": 2107622, "ts": 5333367777691.570, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367777806.199, "dur": 108.183, + "args": { + "External id": 293777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367777956.699, "dur": 39.692, + "args": { + "External id": 293778,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778013.017, "dur": 47.146, + "args": { + "External id": 293779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778069.330, "dur": 33.427, + "args": { + "External id": 293780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778110.854, "dur": 44.667, + "args": { + "External id": 293781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778161.634, "dur": 49.575, + "args": { + "External id": 293782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778221.806, "dur": 43.791, + "args": { + "External id": 293783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367778291.782, "dur": 23.217, + "args": { + "External id": 293784,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367778335.018, "dur": 26.176, + "args": { + "External id": 293785,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367778382.914, "dur": 19.820, + "args": { + "External id": 293786,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367778417.235, "dur": 13.654, + "args": { + "External id": 293787,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778437.882, "dur": 29.684, + "args": { + "External id": 293788,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778470.638, "dur": 31.364, + "args": { + "External id": 293789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367778534.348, "dur": 219.794, + "args": { + "External id": 293790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367778614.399, "dur": 44.837, + "args": { + "External id": 293791,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367778662.986, "dur": 2.638, + "args": { + "External id": 293792,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367778792.050, "dur": 26.165, + "args": { + "External id": 293793,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367778830.529, "dur": 14.945, + "args": { + "External id": 293794,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778853.504, "dur": 41.367, + "args": { + "External id": 293795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778904.150, "dur": 34.618, + "args": { + "External id": 293796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778947.984, "dur": 26.663, + "args": { + "External id": 293797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367778978.987, "dur": 28.995, + "args": { + "External id": 293798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367779013.568, "dur": 24.297, + "args": { + "External id": 293799,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367779043.961, "dur": 45.566, + "args": { + "External id": 293800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367779117.355, "dur": 29.089, + "args": { + "External id": 293801,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367779180.287, "dur": 32.734, + "args": { + "External id": 293802,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367779231.751, "dur": 17.641, + "args": { + "External id": 293803,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367779272.122, "dur": 14.193, + "args": { + "External id": 293804,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367779300.756, "dur": 15.284, + "args": { + "External id": 293805,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779399.004, "dur": 15.484, + "args": { + "External id": 293806,"Record function id": 0, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779402.390, "dur": 11.046, + "args": { + "External id": 293807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779406.745, "dur": 5.779, + "args": { + "External id": 293808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779408.434, "dur": 3.999, + "args": { + "External id": 293809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779418.333, "dur": 4.545, + "args": { + "External id": 293810,"Record function id": 0, "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779419.696, "dur": 2.782, + "args": { + "External id": 293811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779420.392, "dur": 1.667, + "args": { + "External id": 293812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779420.976, "dur": 0.999, + "args": { + "External id": 293813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779426.181, "dur": 7.322, + "args": { + "External id": 293814,"Record function id": 0, "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779427.473, "dur": 5.539, + "args": { + "External id": 293815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779428.180, "dur": 4.389, + "args": { + "External id": 293816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779429.046, "dur": 3.383, + "args": { + "External id": 293817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779436.669, "dur": 5.050, + "args": { + "External id": 293818,"Record function id": 0, "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779438.294, "dur": 2.974, + "args": { + "External id": 293819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779439.166, "dur": 1.726, + "args": { + "External id": 293820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779439.934, "dur": 0.888, + "args": { + "External id": 293821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779444.844, "dur": 7.423, + "args": { + "External id": 293822,"Record function id": 0, "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779449.486, "dur": 2.352, + "args": { + "External id": 293823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779449.938, "dur": 1.506, + "args": { + "External id": 293824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779450.437, "dur": 0.941, + "args": { + "External id": 293825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779455.330, "dur": 3.832, + "args": { + "External id": 293826,"Record function id": 0, "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779456.698, "dur": 2.089, + "args": { + "External id": 293827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779457.261, "dur": 1.131, + "args": { + "External id": 293828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779457.665, "dur": 0.661, + "args": { + "External id": 293829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779462.279, "dur": 3.407, + "args": { + "External id": 293830,"Record function id": 0, "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779463.502, "dur": 1.793, + "args": { + "External id": 293831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779463.907, "dur": 1.011, + "args": { + "External id": 293832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779464.179, "dur": 0.672, + "args": { + "External id": 293833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779468.767, "dur": 3.798, + "args": { + "External id": 293834,"Record function id": 0, "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779470.074, "dur": 2.111, + "args": { + "External id": 293835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779470.525, "dur": 1.098, + "args": { + "External id": 293836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779470.836, "dur": 0.721, + "args": { + "External id": 293837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779476.770, "dur": 5.962, + "args": { + "External id": 293838,"Record function id": 0, "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367779477.870, "dur": 4.451, + "args": { + "External id": 293839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779478.491, "dur": 3.441, + "args": { + "External id": 293840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367779481.174, "dur": 0.684, + "args": { + "External id": 293841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367779486.394, "dur": 37851.124, + "args": { + "External id": 293842,"Record function id": 0, "Sequence number": 1209219, "Fwd thread id": 1, "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367779487.835, "dur": 37840.532, + "args": { + "External id": 293843,"Sequence number": 1209219, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 466 + } + }, + { + "ph": "f", "id": 13, "pid": 2070547, "tid": 2107622, "ts": 5333367779487.835, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.29)", "pid": 2070547, "tid": 2107622, + "ts": 5333367779515.549, "dur": 35.825, + "args": { + "External id": 293844,"Record function id": 0, "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.29)", "pid": 2070547, "tid": 2107622, + "ts": 5333367779559.282, "dur": 57.677, + "args": { + "External id": 293845,"Record function id": 0, "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.29)", "pid": 2070547, "tid": 2107622, + "ts": 5333367779662.058, "dur": 37658.693, + "args": { + "External id": 293846,"Record function id": 0, "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367779750.682, "dur": 6.959, + "args": { + "External id": 293847,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367779767.325, "dur": 6.751, + "args": { + "External id": 293848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367779793.490, "dur": 36581.610, + "args": { + "External id": 293849,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367779807.336, "dur": 36558.318, + "args": { + "External id": 293850,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367779860.923, "dur": 13.890, + "args": { + "External id": 293851,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367779880.944, "dur": 36446.997, + "args": { + "External id": 293852,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367779883.310, "dur": 36443.891, + "args": { + "External id": 293853,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367779888.910, "dur": 4.878, + "args": { + "External id": 293854,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367779895.190, "dur": 36428.327, + "args": { + "External id": 293855,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367816477.052, "dur": 9.349, + "args": { + "External id": 293856,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367816479.956, "dur": 6.106, + "args": { + "External id": 293857,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367816519.407, "dur": 470.529, + "args": { + "External id": 293858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367816551.122, "dur": 433.316, + "args": { + "External id": 293859,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 482, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367816563.981, "dur": 414.173, + "args": { + "External id": 293860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367817016.049, "dur": 2.181, + "args": { + "External id": 293861,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 484, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817081.456, "dur": 6.621, + "args": { + "External id": 293862,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817133.731, "dur": 1.297, + "args": { + "External id": 293863,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817151.453, "dur": 3.628, + "args": { + "External id": 293864,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817184.045, "dur": 1.512, + "args": { + "External id": 293865,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817203.408, "dur": 0.920, + "args": { + "External id": 293866,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817216.276, "dur": 0.839, + "args": { + "External id": 293867,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817229.764, "dur": 3.388, + "args": { + "External id": 293868,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817243.930, "dur": 1.826, + "args": { + "External id": 293869,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817255.823, "dur": 0.969, + "args": { + "External id": 293870,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367817353.185, "dur": 2925.666, + "args": { + "External id": 293871,"Record function id": 0, "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.28)", "pid": 2070547, "tid": 2107622, + "ts": 5333367817373.578, "dur": 1143.717, + "args": { + "External id": 293872,"Record function id": 0, "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070547, "tid": 2107622, + "ts": 5333367817389.430, "dur": 447.913, + "args": { + "External id": 293873,"Record function id": 0, "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817463.160, "dur": 4.005, + "args": { + "External id": 293874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817470.524, "dur": 1.105, + "args": { + "External id": 293875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817473.388, "dur": 3.293, + "args": { + "External id": 293876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817478.375, "dur": 0.722, + "args": { + "External id": 293877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817480.591, "dur": 0.961, + "args": { + "External id": 293878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817485.102, "dur": 0.899, + "args": { + "External id": 293879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817487.749, "dur": 1.296, + "args": { + "External id": 293880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817490.418, "dur": 0.559, + "args": { + "External id": 293881,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817495.312, "dur": 0.705, + "args": { + "External id": 293882,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367817499.544, "dur": 0.999, + "args": { + "External id": 293883,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367817517.816, "dur": 282.896, + "args": { + "External id": 293884,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367817533.295, "dur": 261.898, + "args": { + "External id": 293885,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367817551.865, "dur": 17.692, + "args": { + "External id": 293886,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367817573.219, "dur": 192.853, + "args": { + "External id": 293887,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367817575.860, "dur": 189.632, + "args": { + "External id": 293888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367817698.437, "dur": 10.091, + "args": { + "External id": 293889,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367817711.048, "dur": 53.833, + "args": { + "External id": 293890,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.27", "pid": 2070547, "tid": 2107622, + "ts": 5333367817922.289, "dur": 586.588, + "args": { + "External id": 293891,"Record function id": 0, "Ev Idx": 514 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070547, "tid": 2107622, + "ts": 5333367817941.205, "dur": 555.086, + "args": { + "External id": 293892,"Record function id": 0, "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367818000.975, "dur": 4.942, + "args": { + "External id": 293893,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367818020.104, "dur": 36.590, + "args": { + "External id": 293894,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818025.392, "dur": 1.637, + "args": { + "External id": 293895,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818029.062, "dur": 1.627, + "args": { + "External id": 293896,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818032.060, "dur": 0.414, + "args": { + "External id": 293897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818033.854, "dur": 0.359, + "args": { + "External id": 293898,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818037.048, "dur": 0.302, + "args": { + "External id": 293899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818038.798, "dur": 2.531, + "args": { + "External id": 293900,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818042.965, "dur": 0.383, + "args": { + "External id": 293901,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818046.074, "dur": 0.362, + "args": { + "External id": 293902,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818048.102, "dur": 0.420, + "args": { + "External id": 293903,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367818066.698, "dur": 28.620, + "args": { + "External id": 293904,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367818127.858, "dur": 133.746, + "args": { + "External id": 293905,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367818142.660, "dur": 3.634, + "args": { + "External id": 293906,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367818151.077, "dur": 12.636, + "args": { + "External id": 293907,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367818157.767, "dur": 5.500, + "args": { + "External id": 293908,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818161.460, "dur": 0.473, + "args": { + "External id": 293909,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367818188.482, "dur": 32.130, + "args": { + "External id": 293910,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818190.959, "dur": 0.659, + "args": { + "External id": 293911,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818198.841, "dur": 0.350, + "args": { + "External id": 293912,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818200.597, "dur": 2.169, + "args": { + "External id": 293913,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818204.184, "dur": 1.984, + "args": { + "External id": 293914,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818207.719, "dur": 0.394, + "args": { + "External id": 293915,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818209.379, "dur": 0.380, + "args": { + "External id": 293916,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818212.090, "dur": 0.564, + "args": { + "External id": 293917,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818214.233, "dur": 0.399, + "args": { + "External id": 293918,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367818215.885, "dur": 0.340, + "args": { + "External id": 293919,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367818233.603, "dur": 20.637, + "args": { + "External id": 293920,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367818306.009, "dur": 119.277, + "args": { + "External id": 293921,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367818335.245, "dur": 86.623, + "args": { + "External id": 293922,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 545, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367818344.534, "dur": 73.481, + "args": { + "External id": 293923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367818442.797, "dur": 1.791, + "args": { + "External id": 293924,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 547, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367818524.010, "dur": 1729.419, + "args": { + "External id": 293925,"Sequence number": 1209218, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 548 + } + }, + { + "ph": "f", "id": 14, "pid": 2070547, "tid": 2107622, "ts": 5333367818524.010, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367818677.476, "dur": 109.432, + "args": { + "External id": 293926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367818827.761, "dur": 36.741, + "args": { + "External id": 293927,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367818902.418, "dur": 51.158, + "args": { + "External id": 293928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367818966.891, "dur": 34.092, + "args": { + "External id": 293929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819006.941, "dur": 45.957, + "args": { + "External id": 293930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819058.872, "dur": 28.321, + "args": { + "External id": 293931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819093.796, "dur": 43.745, + "args": { + "External id": 293932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367819188.454, "dur": 25.970, + "args": { + "External id": 293933,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367819238.523, "dur": 27.354, + "args": { + "External id": 293934,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367819291.932, "dur": 20.894, + "args": { + "External id": 293935,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367819326.534, "dur": 14.587, + "args": { + "External id": 293936,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819348.089, "dur": 33.844, + "args": { + "External id": 293937,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819385.184, "dur": 31.326, + "args": { + "External id": 293938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367819446.756, "dur": 169.628, + "args": { + "External id": 293939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367819525.684, "dur": 5.486, + "args": { + "External id": 293940,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367819533.001, "dur": 2.312, + "args": { + "External id": 293941,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367819688.840, "dur": 28.264, + "args": { + "External id": 293942,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367819731.110, "dur": 15.199, + "args": { + "External id": 293943,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819754.908, "dur": 40.942, + "args": { + "External id": 293944,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819801.726, "dur": 51.064, + "args": { + "External id": 293945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819868.196, "dur": 26.599, + "args": { + "External id": 293946,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819899.167, "dur": 30.990, + "args": { + "External id": 293947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819935.326, "dur": 23.169, + "args": { + "External id": 293948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367819966.038, "dur": 28.208, + "args": { + "External id": 293949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367820021.521, "dur": 22.588, + "args": { + "External id": 293950,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367820077.667, "dur": 24.770, + "args": { + "External id": 293951,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367820120.185, "dur": 17.992, + "args": { + "External id": 293952,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367820153.547, "dur": 30.599, + "args": { + "External id": 293953,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367820202.186, "dur": 17.863, + "args": { + "External id": 293954,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820301.300, "dur": 18.095, + "args": { + "External id": 293955,"Record function id": 0, "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820304.640, "dur": 13.774, + "args": { + "External id": 293956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820311.972, "dur": 5.462, + "args": { + "External id": 293957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820313.239, "dur": 4.105, + "args": { + "External id": 293958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820323.405, "dur": 7.406, + "args": { + "External id": 293959,"Record function id": 0, "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820324.797, "dur": 5.577, + "args": { + "External id": 293960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820325.805, "dur": 4.061, + "args": { + "External id": 293961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820326.381, "dur": 3.396, + "args": { + "External id": 293962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820334.077, "dur": 4.528, + "args": { + "External id": 293963,"Record function id": 0, "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820335.301, "dur": 2.857, + "args": { + "External id": 293964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820335.807, "dur": 1.905, + "args": { + "External id": 293965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820336.510, "dur": 1.100, + "args": { + "External id": 293966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820341.941, "dur": 4.255, + "args": { + "External id": 293967,"Record function id": 0, "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820343.342, "dur": 2.448, + "args": { + "External id": 293968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820344.227, "dur": 1.091, + "args": { + "External id": 293969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820344.555, "dur": 0.673, + "args": { + "External id": 293970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820349.303, "dur": 3.603, + "args": { + "External id": 293971,"Record function id": 0, "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820350.556, "dur": 1.947, + "args": { + "External id": 293972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820351.061, "dur": 1.009, + "args": { + "External id": 293973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820351.338, "dur": 0.665, + "args": { + "External id": 293974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820356.058, "dur": 4.092, + "args": { + "External id": 293975,"Record function id": 0, "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820357.542, "dur": 2.206, + "args": { + "External id": 293976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820358.102, "dur": 1.213, + "args": { + "External id": 293977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820358.554, "dur": 0.695, + "args": { + "External id": 293978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820363.330, "dur": 3.516, + "args": { + "External id": 293979,"Record function id": 0, "Ev Idx": 602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820364.665, "dur": 1.783, + "args": { + "External id": 293980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820365.101, "dur": 0.915, + "args": { + "External id": 293981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820365.384, "dur": 0.560, + "args": { + "External id": 293982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820369.908, "dur": 5.440, + "args": { + "External id": 293983,"Record function id": 0, "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820371.145, "dur": 3.768, + "args": { + "External id": 293984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820371.725, "dur": 2.753, + "args": { + "External id": 293985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820373.640, "dur": 0.740, + "args": { + "External id": 293986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820378.429, "dur": 5.713, + "args": { + "External id": 293987,"Record function id": 0, "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367820379.776, "dur": 3.960, + "args": { + "External id": 293988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820380.279, "dur": 2.975, + "args": { + "External id": 293989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367820380.527, "dur": 2.666, + "args": { + "External id": 293990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367820389.080, "dur": 35330.930, + "args": { + "External id": 293991,"Record function id": 0, "Sequence number": 1209217, "Fwd thread id": 1, "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367820390.708, "dur": 35319.308, + "args": { + "External id": 293992,"Sequence number": 1209217, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 615 + } + }, + { + "ph": "f", "id": 15, "pid": 2070547, "tid": 2107622, "ts": 5333367820390.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.28)", "pid": 2070547, "tid": 2107622, + "ts": 5333367820422.225, "dur": 39.839, + "args": { + "External id": 293993,"Record function id": 0, "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.28)", "pid": 2070547, "tid": 2107622, + "ts": 5333367820469.940, "dur": 58.631, + "args": { + "External id": 293994,"Record function id": 0, "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.28)", "pid": 2070547, "tid": 2107622, + "ts": 5333367820534.151, "dur": 35166.619, + "args": { + "External id": 293995,"Record function id": 0, "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367820661.533, "dur": 8.165, + "args": { + "External id": 293996,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367820697.320, "dur": 5.245, + "args": { + "External id": 293997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367820722.041, "dur": 34118.265, + "args": { + "External id": 293998,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367820735.957, "dur": 34094.843, + "args": { + "External id": 293999,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367820786.019, "dur": 13.398, + "args": { + "External id": 294000,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367820805.728, "dur": 33983.415, + "args": { + "External id": 294001,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367820809.383, "dur": 33978.907, + "args": { + "External id": 294002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367820813.455, "dur": 6.500, + "args": { + "External id": 294003,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367820821.598, "dur": 33963.155, + "args": { + "External id": 294004,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367854930.978, "dur": 10.203, + "args": { + "External id": 294005,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367854933.988, "dur": 6.773, + "args": { + "External id": 294006,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367854969.105, "dur": 381.457, + "args": { + "External id": 294007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367855000.761, "dur": 344.519, + "args": { + "External id": 294008,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 631, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367855013.408, "dur": 326.130, + "args": { + "External id": 294009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367855374.744, "dur": 2.178, + "args": { + "External id": 294010,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 633, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855435.736, "dur": 5.993, + "args": { + "External id": 294011,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855487.895, "dur": 3.639, + "args": { + "External id": 294012,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855512.259, "dur": 1.137, + "args": { + "External id": 294013,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855526.623, "dur": 0.695, + "args": { + "External id": 294014,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855541.637, "dur": 0.684, + "args": { + "External id": 294015,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855554.222, "dur": 2.728, + "args": { + "External id": 294016,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855572.179, "dur": 1.226, + "args": { + "External id": 294017,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855584.994, "dur": 2.206, + "args": { + "External id": 294018,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855598.230, "dur": 1.147, + "args": { + "External id": 294019,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367855736.328, "dur": 2774.271, + "args": { + "External id": 294020,"Record function id": 0, "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.27)", "pid": 2070547, "tid": 2107622, + "ts": 5333367855760.221, "dur": 1038.694, + "args": { + "External id": 294021,"Record function id": 0, "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070547, "tid": 2107622, + "ts": 5333367855776.006, "dur": 322.023, + "args": { + "External id": 294022,"Record function id": 0, "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855851.314, "dur": 6.038, + "args": { + "External id": 294023,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855860.979, "dur": 0.867, + "args": { + "External id": 294024,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855863.492, "dur": 1.173, + "args": { + "External id": 294025,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855866.460, "dur": 0.614, + "args": { + "External id": 294026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855868.409, "dur": 0.863, + "args": { + "External id": 294027,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855872.676, "dur": 0.913, + "args": { + "External id": 294028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855875.076, "dur": 1.633, + "args": { + "External id": 294029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855878.415, "dur": 0.583, + "args": { + "External id": 294030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855880.361, "dur": 2.710, + "args": { + "External id": 294031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367855886.136, "dur": 1.024, + "args": { + "External id": 294032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367855904.193, "dur": 163.526, + "args": { + "External id": 294033,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367855919.509, "dur": 142.842, + "args": { + "External id": 294034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367855947.465, "dur": 12.801, + "args": { + "External id": 294035,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367855964.166, "dur": 69.655, + "args": { + "External id": 294036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367855966.795, "dur": 66.713, + "args": { + "External id": 294037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367855971.758, "dur": 7.951, + "args": { + "External id": 294038,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367855981.254, "dur": 51.600, + "args": { + "External id": 294039,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.26", "pid": 2070547, "tid": 2107622, + "ts": 5333367856197.796, "dur": 593.373, + "args": { + "External id": 294040,"Record function id": 0, "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070547, "tid": 2107622, + "ts": 5333367856214.866, "dur": 563.098, + "args": { + "External id": 294041,"Record function id": 0, "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367856270.937, "dur": 5.921, + "args": { + "External id": 294042,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367856292.536, "dur": 34.032, + "args": { + "External id": 294043,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856297.668, "dur": 1.507, + "args": { + "External id": 294044,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856301.304, "dur": 1.974, + "args": { + "External id": 294045,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856304.859, "dur": 0.551, + "args": { + "External id": 294046,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856306.754, "dur": 2.509, + "args": { + "External id": 294047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856312.523, "dur": 0.300, + "args": { + "External id": 294048,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856314.443, "dur": 0.403, + "args": { + "External id": 294049,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856316.143, "dur": 0.639, + "args": { + "External id": 294050,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856319.641, "dur": 0.435, + "args": { + "External id": 294051,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856321.349, "dur": 0.772, + "args": { + "External id": 294052,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367856336.994, "dur": 30.859, + "args": { + "External id": 294053,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367856398.278, "dur": 102.550, + "args": { + "External id": 294054,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367856408.103, "dur": 3.344, + "args": { + "External id": 294055,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367856416.035, "dur": 11.736, + "args": { + "External id": 294056,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367856422.051, "dur": 5.311, + "args": { + "External id": 294057,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856425.515, "dur": 0.639, + "args": { + "External id": 294058,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367856434.302, "dur": 31.306, + "args": { + "External id": 294059,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856436.451, "dur": 2.761, + "args": { + "External id": 294060,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856442.540, "dur": 0.439, + "args": { + "External id": 294061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856444.477, "dur": 0.411, + "args": { + "External id": 294062,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856446.681, "dur": 1.106, + "args": { + "External id": 294063,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856449.168, "dur": 0.526, + "args": { + "External id": 294064,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856451.185, "dur": 0.511, + "args": { + "External id": 294065,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856455.378, "dur": 0.277, + "args": { + "External id": 294066,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856457.099, "dur": 0.479, + "args": { + "External id": 294067,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367856458.897, "dur": 2.416, + "args": { + "External id": 294068,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367856475.071, "dur": 18.289, + "args": { + "External id": 294069,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367856542.106, "dur": 161.113, + "args": { + "External id": 294070,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367856570.574, "dur": 129.148, + "args": { + "External id": 294071,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 694, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367856579.783, "dur": 115.273, + "args": { + "External id": 294072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367856721.984, "dur": 1.913, + "args": { + "External id": 294073,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 696, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367856806.283, "dur": 1682.168, + "args": { + "External id": 294074,"Sequence number": 1209216, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 697 + } + }, + { + "ph": "f", "id": 16, "pid": 2070547, "tid": 2107622, "ts": 5333367856806.283, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367856914.472, "dur": 103.403, + "args": { + "External id": 294075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367857059.327, "dur": 41.175, + "args": { + "External id": 294076,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857118.963, "dur": 71.294, + "args": { + "External id": 294077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857203.916, "dur": 41.095, + "args": { + "External id": 294078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857251.383, "dur": 47.746, + "args": { + "External id": 294079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857305.700, "dur": 27.889, + "args": { + "External id": 294080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857346.455, "dur": 40.775, + "args": { + "External id": 294081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367857416.270, "dur": 25.606, + "args": { + "External id": 294082,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367857461.447, "dur": 30.705, + "args": { + "External id": 294083,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367857514.833, "dur": 23.760, + "args": { + "External id": 294084,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367857554.553, "dur": 16.398, + "args": { + "External id": 294085,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857589.241, "dur": 63.811, + "args": { + "External id": 294086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857659.104, "dur": 38.657, + "args": { + "External id": 294087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367857725.844, "dur": 172.707, + "args": { + "External id": 294088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367857804.402, "dur": 7.025, + "args": { + "External id": 294089,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367857813.399, "dur": 2.514, + "args": { + "External id": 294090,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367857932.683, "dur": 27.303, + "args": { + "External id": 294091,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367857971.520, "dur": 15.243, + "args": { + "External id": 294092,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367857995.272, "dur": 40.292, + "args": { + "External id": 294093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367858042.778, "dur": 35.352, + "args": { + "External id": 294094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367858084.895, "dur": 22.891, + "args": { + "External id": 294095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367858112.104, "dur": 30.090, + "args": { + "External id": 294096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367858147.596, "dur": 52.307, + "args": { + "External id": 294097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367858220.993, "dur": 44.022, + "args": { + "External id": 294098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367858291.427, "dur": 28.354, + "args": { + "External id": 294099,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367858337.010, "dur": 23.397, + "args": { + "External id": 294100,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367858380.830, "dur": 17.732, + "args": { + "External id": 294101,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367858413.411, "dur": 18.083, + "args": { + "External id": 294102,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367858444.430, "dur": 15.295, + "args": { + "External id": 294103,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858532.421, "dur": 16.924, + "args": { + "External id": 294104,"Record function id": 0, "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858535.703, "dur": 12.707, + "args": { + "External id": 294105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858539.917, "dur": 7.599, + "args": { + "External id": 294106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858541.423, "dur": 5.960, + "args": { + "External id": 294107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858553.469, "dur": 4.971, + "args": { + "External id": 294108,"Record function id": 0, "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858555.011, "dur": 2.944, + "args": { + "External id": 294109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858555.659, "dur": 1.780, + "args": { + "External id": 294110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858556.428, "dur": 0.936, + "args": { + "External id": 294111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858561.661, "dur": 4.107, + "args": { + "External id": 294112,"Record function id": 0, "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858562.980, "dur": 2.394, + "args": { + "External id": 294113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858563.626, "dur": 1.352, + "args": { + "External id": 294114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858564.088, "dur": 0.802, + "args": { + "External id": 294115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858568.977, "dur": 3.744, + "args": { + "External id": 294116,"Record function id": 0, "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858570.358, "dur": 1.976, + "args": { + "External id": 294117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858570.894, "dur": 1.054, + "args": { + "External id": 294118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858571.191, "dur": 0.686, + "args": { + "External id": 294119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858575.881, "dur": 6.056, + "args": { + "External id": 294120,"Record function id": 0, "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858577.164, "dur": 4.343, + "args": { + "External id": 294121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858577.605, "dur": 3.339, + "args": { + "External id": 294122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858580.193, "dur": 0.644, + "args": { + "External id": 294123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858585.089, "dur": 4.169, + "args": { + "External id": 294124,"Record function id": 0, "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858586.691, "dur": 2.166, + "args": { + "External id": 294125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858587.421, "dur": 0.997, + "args": { + "External id": 294126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858587.703, "dur": 0.639, + "args": { + "External id": 294127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858592.594, "dur": 3.725, + "args": { + "External id": 294128,"Record function id": 0, "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858593.773, "dur": 2.112, + "args": { + "External id": 294129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858594.452, "dur": 1.047, + "args": { + "External id": 294130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858594.759, "dur": 0.673, + "args": { + "External id": 294131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858599.423, "dur": 5.655, + "args": { + "External id": 294132,"Record function id": 0, "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858600.610, "dur": 4.070, + "args": { + "External id": 294133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858601.057, "dur": 3.243, + "args": { + "External id": 294134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858601.647, "dur": 2.589, + "args": { + "External id": 294135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858609.415, "dur": 5.794, + "args": { + "External id": 294136,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367858610.659, "dur": 4.137, + "args": { + "External id": 294137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858611.200, "dur": 3.201, + "args": { + "External id": 294138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367858613.429, "dur": 0.883, + "args": { + "External id": 294139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367858655.904, "dur": 36368.936, + "args": { + "External id": 294140,"Record function id": 0, "Sequence number": 1209215, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367858658.372, "dur": 36357.677, + "args": { + "External id": 294141,"Sequence number": 1209215, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 17, "pid": 2070547, "tid": 2107622, "ts": 5333367858658.372, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.27)", "pid": 2070547, "tid": 2107622, + "ts": 5333367858692.353, "dur": 38.326, + "args": { + "External id": 294142,"Record function id": 0, "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.27)", "pid": 2070547, "tid": 2107622, + "ts": 5333367858738.331, "dur": 62.231, + "args": { + "External id": 294143,"Record function id": 0, "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.27)", "pid": 2070547, "tid": 2107622, + "ts": 5333367858806.049, "dur": 36202.418, + "args": { + "External id": 294144,"Record function id": 0, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367858900.214, "dur": 7.191, + "args": { + "External id": 294145,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367858916.500, "dur": 4.794, + "args": { + "External id": 294146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367858935.845, "dur": 35211.156, + "args": { + "External id": 294147,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367858952.285, "dur": 35185.722, + "args": { + "External id": 294148,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367859003.200, "dur": 13.680, + "args": { + "External id": 294149,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367859023.087, "dur": 35077.202, + "args": { + "External id": 294150,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367859025.410, "dur": 35074.171, + "args": { + "External id": 294151,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367859030.780, "dur": 5.120, + "args": { + "External id": 294152,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367859037.502, "dur": 35058.443, + "args": { + "External id": 294153,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367894248.231, "dur": 10.108, + "args": { + "External id": 294154,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367894251.390, "dur": 6.314, + "args": { + "External id": 294155,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367894287.278, "dur": 408.348, + "args": { + "External id": 294156,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367894317.954, "dur": 372.408, + "args": { + "External id": 294157,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 780, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367894330.101, "dur": 354.030, + "args": { + "External id": 294158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367894720.124, "dur": 2.436, + "args": { + "External id": 294159,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 782, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894784.726, "dur": 6.440, + "args": { + "External id": 294160,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894839.630, "dur": 1.349, + "args": { + "External id": 294161,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894857.333, "dur": 1.353, + "args": { + "External id": 294162,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894871.065, "dur": 0.982, + "args": { + "External id": 294163,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894885.544, "dur": 1.026, + "args": { + "External id": 294164,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894900.208, "dur": 0.777, + "args": { + "External id": 294165,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894915.425, "dur": 1.174, + "args": { + "External id": 294166,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894928.849, "dur": 1.719, + "args": { + "External id": 294167,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367894941.766, "dur": 0.670, + "args": { + "External id": 294168,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367895038.652, "dur": 2708.757, + "args": { + "External id": 294169,"Record function id": 0, "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.26)", "pid": 2070547, "tid": 2107622, + "ts": 5333367895057.853, "dur": 1008.402, + "args": { + "External id": 294170,"Record function id": 0, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070547, "tid": 2107622, + "ts": 5333367895071.968, "dur": 319.669, + "args": { + "External id": 294171,"Record function id": 0, "Ev Idx": 794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895144.522, "dur": 3.954, + "args": { + "External id": 294172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895151.620, "dur": 0.816, + "args": { + "External id": 294173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895156.020, "dur": 0.979, + "args": { + "External id": 294174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895158.774, "dur": 0.820, + "args": { + "External id": 294175,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895161.075, "dur": 0.711, + "args": { + "External id": 294176,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895163.327, "dur": 0.805, + "args": { + "External id": 294177,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895182.318, "dur": 2.587, + "args": { + "External id": 294178,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895187.900, "dur": 2.950, + "args": { + "External id": 294179,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895192.650, "dur": 0.624, + "args": { + "External id": 294180,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367895194.997, "dur": 0.552, + "args": { + "External id": 294181,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367895216.036, "dur": 146.670, + "args": { + "External id": 294182,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367895231.805, "dur": 126.708, + "args": { + "External id": 294183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367895250.014, "dur": 12.467, + "args": { + "External id": 294184,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367895266.156, "dur": 65.987, + "args": { + "External id": 294185,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367895268.951, "dur": 62.890, + "args": { + "External id": 294186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895274.146, "dur": 4.916, + "args": { + "External id": 294187,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367895280.470, "dur": 50.800, + "args": { + "External id": 294188,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2070547, "tid": 2107622, + "ts": 5333367895469.323, "dur": 589.538, + "args": { + "External id": 294189,"Record function id": 0, "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070547, "tid": 2107622, + "ts": 5333367895485.285, "dur": 560.885, + "args": { + "External id": 294190,"Record function id": 0, "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367895539.502, "dur": 4.579, + "args": { + "External id": 294191,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367895559.204, "dur": 29.813, + "args": { + "External id": 294192,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895563.738, "dur": 1.328, + "args": { + "External id": 294193,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895566.815, "dur": 1.430, + "args": { + "External id": 294194,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895569.757, "dur": 2.414, + "args": { + "External id": 294195,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895573.452, "dur": 0.476, + "args": { + "External id": 294196,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895576.639, "dur": 0.496, + "args": { + "External id": 294197,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895578.416, "dur": 0.460, + "args": { + "External id": 294198,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895580.345, "dur": 0.331, + "args": { + "External id": 294199,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895582.932, "dur": 0.400, + "args": { + "External id": 294200,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895584.517, "dur": 0.405, + "args": { + "External id": 294201,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367895601.197, "dur": 67.054, + "args": { + "External id": 294202,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367895702.498, "dur": 109.012, + "args": { + "External id": 294203,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367895712.941, "dur": 5.364, + "args": { + "External id": 294204,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367895723.598, "dur": 12.699, + "args": { + "External id": 294205,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367895727.930, "dur": 7.932, + "args": { + "External id": 294206,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895731.888, "dur": 2.410, + "args": { + "External id": 294207,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367895743.859, "dur": 26.943, + "args": { + "External id": 294208,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895746.167, "dur": 0.467, + "args": { + "External id": 294209,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895748.386, "dur": 1.311, + "args": { + "External id": 294210,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895751.052, "dur": 0.380, + "args": { + "External id": 294211,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895752.790, "dur": 0.537, + "args": { + "External id": 294212,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895755.970, "dur": 0.510, + "args": { + "External id": 294213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895757.837, "dur": 0.518, + "args": { + "External id": 294214,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895759.929, "dur": 0.495, + "args": { + "External id": 294215,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895762.900, "dur": 2.238, + "args": { + "External id": 294216,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367895766.529, "dur": 0.437, + "args": { + "External id": 294217,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367895785.209, "dur": 18.826, + "args": { + "External id": 294218,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367895859.588, "dur": 118.895, + "args": { + "External id": 294219,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367895889.990, "dur": 85.362, + "args": { + "External id": 294220,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 843, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367895899.269, "dur": 72.185, + "args": { + "External id": 294221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367895995.043, "dur": 1.835, + "args": { + "External id": 294222,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 845, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367896073.805, "dur": 1650.449, + "args": { + "External id": 294223,"Sequence number": 1209214, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 846 + } + }, + { + "ph": "f", "id": 18, "pid": 2070547, "tid": 2107622, "ts": 5333367896073.805, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896197.872, "dur": 111.745, + "args": { + "External id": 294224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367896349.820, "dur": 36.407, + "args": { + "External id": 294225,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896403.890, "dur": 46.914, + "args": { + "External id": 294226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896460.915, "dur": 32.288, + "args": { + "External id": 294227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896500.729, "dur": 47.982, + "args": { + "External id": 294228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896555.208, "dur": 30.543, + "args": { + "External id": 294229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896594.682, "dur": 78.112, + "args": { + "External id": 294230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367896701.791, "dur": 24.633, + "args": { + "External id": 294231,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367896745.529, "dur": 27.276, + "args": { + "External id": 294232,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367896794.132, "dur": 19.879, + "args": { + "External id": 294233,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367896827.430, "dur": 14.279, + "args": { + "External id": 294234,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896853.097, "dur": 32.348, + "args": { + "External id": 294235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367896889.069, "dur": 31.821, + "args": { + "External id": 294236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367896946.669, "dur": 172.771, + "args": { + "External id": 294237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367897023.159, "dur": 5.697, + "args": { + "External id": 294238,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367897030.619, "dur": 6.343, + "args": { + "External id": 294239,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367897149.293, "dur": 41.357, + "args": { + "External id": 294240,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367897211.753, "dur": 15.853, + "args": { + "External id": 294241,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367897236.658, "dur": 40.682, + "args": { + "External id": 294242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367897282.472, "dur": 33.797, + "args": { + "External id": 294243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367897324.324, "dur": 21.547, + "args": { + "External id": 294244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367897351.968, "dur": 29.023, + "args": { + "External id": 294245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367897386.160, "dur": 21.073, + "args": { + "External id": 294246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367897413.396, "dur": 28.274, + "args": { + "External id": 294247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367897477.626, "dur": 30.472, + "args": { + "External id": 294248,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367897527.880, "dur": 27.434, + "args": { + "External id": 294249,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367897570.786, "dur": 16.483, + "args": { + "External id": 294250,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367897603.784, "dur": 52.428, + "args": { + "External id": 294251,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367897674.532, "dur": 18.152, + "args": { + "External id": 294252,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897769.429, "dur": 17.111, + "args": { + "External id": 294253,"Record function id": 0, "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897772.854, "dur": 12.753, + "args": { + "External id": 294254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897777.323, "dur": 7.338, + "args": { + "External id": 294255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897778.922, "dur": 5.655, + "args": { + "External id": 294256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897790.528, "dur": 4.658, + "args": { + "External id": 294257,"Record function id": 0, "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897791.887, "dur": 2.886, + "args": { + "External id": 294258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897792.893, "dur": 1.399, + "args": { + "External id": 294259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897793.320, "dur": 0.857, + "args": { + "External id": 294260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897798.416, "dur": 4.268, + "args": { + "External id": 294261,"Record function id": 0, "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897799.721, "dur": 2.537, + "args": { + "External id": 294262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897800.567, "dur": 1.194, + "args": { + "External id": 294263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897801.040, "dur": 0.645, + "args": { + "External id": 294264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897805.871, "dur": 4.085, + "args": { + "External id": 294265,"Record function id": 0, "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897807.233, "dur": 2.347, + "args": { + "External id": 294266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897807.972, "dur": 1.147, + "args": { + "External id": 294267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897808.318, "dur": 0.723, + "args": { + "External id": 294268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897813.070, "dur": 3.434, + "args": { + "External id": 294269,"Record function id": 0, "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897814.154, "dur": 1.960, + "args": { + "External id": 294270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897814.637, "dur": 0.988, + "args": { + "External id": 294271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897814.918, "dur": 0.633, + "args": { + "External id": 294272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897819.920, "dur": 5.997, + "args": { + "External id": 294273,"Record function id": 0, "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897821.138, "dur": 4.359, + "args": { + "External id": 294274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897821.959, "dur": 3.090, + "args": { + "External id": 294275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897824.167, "dur": 0.770, + "args": { + "External id": 294276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897829.109, "dur": 3.845, + "args": { + "External id": 294277,"Record function id": 0, "Ev Idx": 900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897830.382, "dur": 2.141, + "args": { + "External id": 294278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897830.839, "dur": 1.254, + "args": { + "External id": 294279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897831.242, "dur": 0.764, + "args": { + "External id": 294280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897836.137, "dur": 5.040, + "args": { + "External id": 294281,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897837.171, "dur": 3.617, + "args": { + "External id": 294282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897837.643, "dur": 2.697, + "args": { + "External id": 294283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897837.903, "dur": 2.373, + "args": { + "External id": 294284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897845.609, "dur": 3.768, + "args": { + "External id": 294285,"Record function id": 0, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367897846.967, "dur": 2.009, + "args": { + "External id": 294286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897847.420, "dur": 1.112, + "args": { + "External id": 294287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367897847.772, "dur": 0.691, + "args": { + "External id": 294288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367897853.053, "dur": 37462.019, + "args": { + "External id": 294289,"Record function id": 0, "Sequence number": 1209213, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367897854.360, "dur": 37452.039, + "args": { + "External id": 294290,"Sequence number": 1209213, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 19, "pid": 2070547, "tid": 2107622, "ts": 5333367897854.360, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.26)", "pid": 2070547, "tid": 2107622, + "ts": 5333367897887.073, "dur": 36.364, + "args": { + "External id": 294291,"Record function id": 0, "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.26)", "pid": 2070547, "tid": 2107622, + "ts": 5333367897930.819, "dur": 63.480, + "args": { + "External id": 294292,"Record function id": 0, "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.26)", "pid": 2070547, "tid": 2107622, + "ts": 5333367898000.071, "dur": 37298.549, + "args": { + "External id": 294293,"Record function id": 0, "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367898086.940, "dur": 6.873, + "args": { + "External id": 294294,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367898102.902, "dur": 4.405, + "args": { + "External id": 294295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367898121.982, "dur": 36303.068, + "args": { + "External id": 294296,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367898135.037, "dur": 36281.122, + "args": { + "External id": 294297,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367898184.467, "dur": 14.583, + "args": { + "External id": 294298,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367898208.708, "dur": 36166.354, + "args": { + "External id": 294299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367898213.187, "dur": 36161.119, + "args": { + "External id": 294300,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367898218.001, "dur": 5.487, + "args": { + "External id": 294301,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367898224.989, "dur": 36145.624, + "args": { + "External id": 294302,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367934511.274, "dur": 9.501, + "args": { + "External id": 294303,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367934514.255, "dur": 6.164, + "args": { + "External id": 294304,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367934546.968, "dur": 441.355, + "args": { + "External id": 294305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367934574.687, "dur": 407.834, + "args": { + "External id": 294306,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 929, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367934586.182, "dur": 390.090, + "args": { + "External id": 294307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367935011.276, "dur": 2.109, + "args": { + "External id": 294308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 931, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935076.226, "dur": 6.237, + "args": { + "External id": 294309,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935128.759, "dur": 1.146, + "args": { + "External id": 294310,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935145.021, "dur": 1.273, + "args": { + "External id": 294311,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935157.381, "dur": 1.096, + "args": { + "External id": 294312,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935186.151, "dur": 1.281, + "args": { + "External id": 294313,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935201.864, "dur": 0.675, + "args": { + "External id": 294314,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935214.149, "dur": 1.085, + "args": { + "External id": 294315,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935226.278, "dur": 1.770, + "args": { + "External id": 294316,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935237.782, "dur": 0.813, + "args": { + "External id": 294317,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367935330.141, "dur": 2644.283, + "args": { + "External id": 294318,"Record function id": 0, "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2070547, "tid": 2107622, + "ts": 5333367935348.334, "dur": 980.938, + "args": { + "External id": 294319,"Record function id": 0, "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070547, "tid": 2107622, + "ts": 5333367935362.497, "dur": 334.922, + "args": { + "External id": 294320,"Record function id": 0, "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935434.237, "dur": 3.746, + "args": { + "External id": 294321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935441.105, "dur": 0.776, + "args": { + "External id": 294322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935443.947, "dur": 0.824, + "args": { + "External id": 294323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935447.777, "dur": 0.938, + "args": { + "External id": 294324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935450.290, "dur": 0.764, + "args": { + "External id": 294325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935452.510, "dur": 0.735, + "args": { + "External id": 294326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935454.687, "dur": 1.981, + "args": { + "External id": 294327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935458.910, "dur": 2.365, + "args": { + "External id": 294328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935462.614, "dur": 0.938, + "args": { + "External id": 294329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367935465.003, "dur": 1.044, + "args": { + "External id": 294330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367935482.614, "dur": 184.670, + "args": { + "External id": 294331,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367935497.356, "dur": 164.790, + "args": { + "External id": 294332,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367935515.381, "dur": 12.442, + "args": { + "External id": 294333,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367935531.537, "dur": 65.311, + "args": { + "External id": 294334,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367935534.165, "dur": 62.411, + "args": { + "External id": 294335,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935538.304, "dur": 6.475, + "args": { + "External id": 294336,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367935546.228, "dur": 49.746, + "args": { + "External id": 294337,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2070547, "tid": 2107622, + "ts": 5333367935780.130, "dur": 541.317, + "args": { + "External id": 294338,"Record function id": 0, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070547, "tid": 2107622, + "ts": 5333367935796.409, "dur": 512.607, + "args": { + "External id": 294339,"Record function id": 0, "Ev Idx": 962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367935850.340, "dur": 5.648, + "args": { + "External id": 294340,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367935870.766, "dur": 28.770, + "args": { + "External id": 294341,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935874.808, "dur": 1.598, + "args": { + "External id": 294342,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935878.165, "dur": 1.426, + "args": { + "External id": 294343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935880.836, "dur": 2.645, + "args": { + "External id": 294344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935884.750, "dur": 0.587, + "args": { + "External id": 294345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935887.147, "dur": 0.249, + "args": { + "External id": 294346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935888.686, "dur": 0.430, + "args": { + "External id": 294347,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935890.463, "dur": 0.473, + "args": { + "External id": 294348,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935892.780, "dur": 0.463, + "args": { + "External id": 294349,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935894.336, "dur": 0.600, + "args": { + "External id": 294350,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367935908.644, "dur": 29.877, + "args": { + "External id": 294351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367935967.631, "dur": 93.259, + "args": { + "External id": 294352,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367935977.548, "dur": 3.304, + "args": { + "External id": 294353,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367935985.783, "dur": 11.998, + "args": { + "External id": 294354,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367935990.164, "dur": 7.201, + "args": { + "External id": 294355,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367935993.516, "dur": 2.628, + "args": { + "External id": 294356,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367936004.178, "dur": 23.352, + "args": { + "External id": 294357,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936006.150, "dur": 0.580, + "args": { + "External id": 294358,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936008.830, "dur": 0.364, + "args": { + "External id": 294359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936010.228, "dur": 0.545, + "args": { + "External id": 294360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936012.299, "dur": 1.126, + "args": { + "External id": 294361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936014.684, "dur": 0.337, + "args": { + "External id": 294362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936015.855, "dur": 0.464, + "args": { + "External id": 294363,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936018.831, "dur": 0.411, + "args": { + "External id": 294364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936020.479, "dur": 2.213, + "args": { + "External id": 294365,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367936023.598, "dur": 0.364, + "args": { + "External id": 294366,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367936036.016, "dur": 17.856, + "args": { + "External id": 294367,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367936101.744, "dur": 135.005, + "args": { + "External id": 294368,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367936125.216, "dur": 108.059, + "args": { + "External id": 294369,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 992, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367936135.369, "dur": 92.217, + "args": { + "External id": 294370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367936252.649, "dur": 2.026, + "args": { + "External id": 294371,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 994, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367936336.321, "dur": 1613.526, + "args": { + "External id": 294372,"Sequence number": 1209212, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 995 + } + }, + { + "ph": "f", "id": 20, "pid": 2070547, "tid": 2107622, "ts": 5333367936336.321, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367936438.371, "dur": 101.384, + "args": { + "External id": 294373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367936576.874, "dur": 37.094, + "args": { + "External id": 294374,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367936674.270, "dur": 57.076, + "args": { + "External id": 294375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367936741.509, "dur": 33.776, + "args": { + "External id": 294376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367936781.630, "dur": 45.747, + "args": { + "External id": 294377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367936835.038, "dur": 26.894, + "args": { + "External id": 294378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367936871.864, "dur": 42.289, + "args": { + "External id": 294379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367936939.780, "dur": 22.922, + "args": { + "External id": 294380,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367936978.558, "dur": 26.684, + "args": { + "External id": 294381,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367937023.928, "dur": 17.938, + "args": { + "External id": 294382,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367937053.510, "dur": 13.402, + "args": { + "External id": 294383,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937075.594, "dur": 27.584, + "args": { + "External id": 294384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937106.245, "dur": 32.734, + "args": { + "External id": 294385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367937163.952, "dur": 196.344, + "args": { + "External id": 294386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367937261.153, "dur": 7.122, + "args": { + "External id": 294387,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367937270.594, "dur": 3.658, + "args": { + "External id": 294388,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367937391.300, "dur": 22.996, + "args": { + "External id": 294389,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367937427.521, "dur": 13.230, + "args": { + "External id": 294390,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937447.970, "dur": 40.440, + "args": { + "External id": 294391,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937493.233, "dur": 34.019, + "args": { + "External id": 294392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937533.291, "dur": 19.745, + "args": { + "External id": 294393,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937559.168, "dur": 29.464, + "args": { + "External id": 294394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937595.090, "dur": 19.984, + "args": { + "External id": 294395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367937659.521, "dur": 35.256, + "args": { + "External id": 294396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367937714.533, "dur": 21.320, + "args": { + "External id": 294397,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367937754.260, "dur": 22.406, + "args": { + "External id": 294398,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367937789.633, "dur": 33.237, + "args": { + "External id": 294399,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367937845.648, "dur": 15.841, + "args": { + "External id": 294400,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367937900.012, "dur": 15.501, + "args": { + "External id": 294401,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938000.864, "dur": 14.217, + "args": { + "External id": 294402,"Record function id": 0, "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938004.208, "dur": 9.877, + "args": { + "External id": 294403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938008.278, "dur": 4.984, + "args": { + "External id": 294404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938009.441, "dur": 3.724, + "args": { + "External id": 294405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938019.011, "dur": 3.724, + "args": { + "External id": 294406,"Record function id": 0, "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938020.286, "dur": 2.030, + "args": { + "External id": 294407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938020.879, "dur": 0.991, + "args": { + "External id": 294408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938021.173, "dur": 0.609, + "args": { + "External id": 294409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938026.030, "dur": 3.586, + "args": { + "External id": 294410,"Record function id": 0, "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938027.199, "dur": 2.009, + "args": { + "External id": 294411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938027.704, "dur": 1.047, + "args": { + "External id": 294412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938028.060, "dur": 0.611, + "args": { + "External id": 294413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938032.776, "dur": 3.928, + "args": { + "External id": 294414,"Record function id": 0, "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938033.845, "dur": 2.463, + "args": { + "External id": 294415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938034.856, "dur": 1.047, + "args": { + "External id": 294416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938035.144, "dur": 0.693, + "args": { + "External id": 294417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938040.111, "dur": 3.396, + "args": { + "External id": 294418,"Record function id": 0, "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938041.172, "dur": 1.930, + "args": { + "External id": 294419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938041.617, "dur": 1.086, + "args": { + "External id": 294420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938042.037, "dur": 0.597, + "args": { + "External id": 294421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938046.847, "dur": 3.671, + "args": { + "External id": 294422,"Record function id": 0, "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938047.862, "dur": 2.245, + "args": { + "External id": 294423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938048.317, "dur": 1.349, + "args": { + "External id": 294424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938049.121, "dur": 0.471, + "args": { + "External id": 294425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938053.730, "dur": 5.780, + "args": { + "External id": 294426,"Record function id": 0, "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938054.836, "dur": 4.227, + "args": { + "External id": 294427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938055.310, "dur": 3.353, + "args": { + "External id": 294428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938056.179, "dur": 2.416, + "args": { + "External id": 294429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938062.570, "dur": 4.067, + "args": { + "External id": 294430,"Record function id": 0, "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938063.732, "dur": 2.493, + "args": { + "External id": 294431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938064.188, "dur": 1.620, + "args": { + "External id": 294432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938064.750, "dur": 0.982, + "args": { + "External id": 294433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938069.752, "dur": 3.706, + "args": { + "External id": 294434,"Record function id": 0, "Ev Idx": 1057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367938070.755, "dur": 2.284, + "args": { + "External id": 294435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938071.188, "dur": 1.435, + "args": { + "External id": 294436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367938071.920, "dur": 0.639, + "args": { + "External id": 294437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367938078.178, "dur": 37488.323, + "args": { + "External id": 294438,"Record function id": 0, "Sequence number": 1209211, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367938079.423, "dur": 37478.749, + "args": { + "External id": 294439,"Sequence number": 1209211, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 21, "pid": 2070547, "tid": 2107622, "ts": 5333367938079.423, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2070547, "tid": 2107622, + "ts": 5333367938110.072, "dur": 36.538, + "args": { + "External id": 294440,"Record function id": 0, "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2070547, "tid": 2107622, + "ts": 5333367938153.730, "dur": 75.322, + "args": { + "External id": 294441,"Record function id": 0, "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2070547, "tid": 2107622, + "ts": 5333367938236.143, "dur": 37314.407, + "args": { + "External id": 294442,"Record function id": 0, "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367938324.358, "dur": 6.981, + "args": { + "External id": 294443,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367938341.386, "dur": 4.761, + "args": { + "External id": 294444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367938361.242, "dur": 36383.873, + "args": { + "External id": 294445,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367938373.606, "dur": 36362.135, + "args": { + "External id": 294446,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367938463.052, "dur": 14.329, + "args": { + "External id": 294447,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367938483.502, "dur": 36216.677, + "args": { + "External id": 294448,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367938485.844, "dur": 36213.556, + "args": { + "External id": 294449,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367938490.482, "dur": 4.919, + "args": { + "External id": 294450,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367938497.047, "dur": 36198.631, + "args": { + "External id": 294451,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367974835.963, "dur": 9.303, + "args": { + "External id": 294452,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367974838.733, "dur": 6.170, + "args": { + "External id": 294453,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367974870.889, "dur": 397.176, + "args": { + "External id": 294454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367974896.691, "dur": 366.417, + "args": { + "External id": 294455,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1078, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367974908.405, "dur": 348.300, + "args": { + "External id": 294456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367975287.993, "dur": 1.986, + "args": { + "External id": 294457,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1080, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975352.415, "dur": 8.313, + "args": { + "External id": 294458,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975403.918, "dur": 1.329, + "args": { + "External id": 294459,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975420.617, "dur": 1.075, + "args": { + "External id": 294460,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975432.636, "dur": 0.709, + "args": { + "External id": 294461,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975443.816, "dur": 2.739, + "args": { + "External id": 294462,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975456.622, "dur": 1.012, + "args": { + "External id": 294463,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975467.654, "dur": 0.766, + "args": { + "External id": 294464,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975478.421, "dur": 1.441, + "args": { + "External id": 294465,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975489.114, "dur": 2.646, + "args": { + "External id": 294466,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367975580.673, "dur": 2672.635, + "args": { + "External id": 294467,"Record function id": 0, "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2070547, "tid": 2107622, + "ts": 5333367975599.945, "dur": 996.727, + "args": { + "External id": 294468,"Record function id": 0, "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070547, "tid": 2107622, + "ts": 5333367975616.002, "dur": 357.063, + "args": { + "External id": 294469,"Record function id": 0, "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975733.726, "dur": 5.077, + "args": { + "External id": 294470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975742.032, "dur": 0.630, + "args": { + "External id": 294471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975744.499, "dur": 0.865, + "args": { + "External id": 294472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975746.744, "dur": 0.718, + "args": { + "External id": 294473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975748.782, "dur": 0.624, + "args": { + "External id": 294474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975750.871, "dur": 0.494, + "args": { + "External id": 294475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975752.898, "dur": 2.969, + "args": { + "External id": 294476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975757.316, "dur": 0.690, + "args": { + "External id": 294477,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975759.231, "dur": 0.580, + "args": { + "External id": 294478,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367975761.048, "dur": 0.681, + "args": { + "External id": 294479,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367975779.547, "dur": 163.267, + "args": { + "External id": 294480,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367975794.373, "dur": 143.701, + "args": { + "External id": 294481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367975822.198, "dur": 14.622, + "args": { + "External id": 294482,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367975841.311, "dur": 68.915, + "args": { + "External id": 294483,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367975843.947, "dur": 65.933, + "args": { + "External id": 294484,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367975848.275, "dur": 6.524, + "args": { + "External id": 294485,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367975856.568, "dur": 52.686, + "args": { + "External id": 294486,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2070547, "tid": 2107622, + "ts": 5333367976049.249, "dur": 539.398, + "args": { + "External id": 294487,"Record function id": 0, "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070547, "tid": 2107622, + "ts": 5333367976066.823, "dur": 509.437, + "args": { + "External id": 294488,"Record function id": 0, "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367976119.401, "dur": 4.780, + "args": { + "External id": 294489,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367976139.487, "dur": 22.968, + "args": { + "External id": 294490,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976143.542, "dur": 1.646, + "args": { + "External id": 294491,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976146.699, "dur": 2.580, + "args": { + "External id": 294492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976150.308, "dur": 0.353, + "args": { + "External id": 294493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976151.625, "dur": 0.280, + "args": { + "External id": 294494,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976153.476, "dur": 0.439, + "args": { + "External id": 294495,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976154.572, "dur": 0.329, + "args": { + "External id": 294496,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976155.441, "dur": 0.360, + "args": { + "External id": 294497,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976157.902, "dur": 0.189, + "args": { + "External id": 294498,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976158.670, "dur": 0.343, + "args": { + "External id": 294499,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367976187.876, "dur": 33.693, + "args": { + "External id": 294500,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333367976254.947, "dur": 97.144, + "args": { + "External id": 294501,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367976265.123, "dur": 5.930, + "args": { + "External id": 294502,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333367976275.724, "dur": 9.473, + "args": { + "External id": 294503,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333367976279.764, "dur": 5.014, + "args": { + "External id": 294504,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976282.669, "dur": 0.597, + "args": { + "External id": 294505,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333367976291.682, "dur": 21.372, + "args": { + "External id": 294506,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976293.161, "dur": 0.466, + "args": { + "External id": 294507,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976298.519, "dur": 0.453, + "args": { + "External id": 294508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976299.706, "dur": 0.373, + "args": { + "External id": 294509,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976300.859, "dur": 0.712, + "args": { + "External id": 294510,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976302.182, "dur": 0.180, + "args": { + "External id": 294511,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976303.084, "dur": 0.314, + "args": { + "External id": 294512,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976305.614, "dur": 1.999, + "args": { + "External id": 294513,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976308.392, "dur": 0.282, + "args": { + "External id": 294514,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367976309.744, "dur": 0.330, + "args": { + "External id": 294515,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367976325.540, "dur": 19.444, + "args": { + "External id": 294516,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333367976399.000, "dur": 114.012, + "args": { + "External id": 294517,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367976424.923, "dur": 84.632, + "args": { + "External id": 294518,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1141, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333367976434.053, "dur": 71.374, + "args": { + "External id": 294519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333367976528.698, "dur": 1.735, + "args": { + "External id": 294520,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1143, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367976603.164, "dur": 1621.979, + "args": { + "External id": 294521,"Sequence number": 1209210, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1144 + } + }, + { + "ph": "f", "id": 22, "pid": 2070547, "tid": 2107622, "ts": 5333367976603.164, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367976750.176, "dur": 103.954, + "args": { + "External id": 294522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367976892.994, "dur": 36.252, + "args": { + "External id": 294523,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333367976946.564, "dur": 48.153, + "args": { + "External id": 294524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977004.066, "dur": 31.721, + "args": { + "External id": 294525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977041.977, "dur": 44.051, + "args": { + "External id": 294526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977092.932, "dur": 26.732, + "args": { + "External id": 294527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977129.718, "dur": 57.141, + "args": { + "External id": 294528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367977213.467, "dur": 26.343, + "args": { + "External id": 294529,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333367977265.363, "dur": 25.564, + "args": { + "External id": 294530,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367977311.109, "dur": 17.810, + "args": { + "External id": 294531,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367977339.983, "dur": 12.692, + "args": { + "External id": 294532,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977362.270, "dur": 31.159, + "args": { + "External id": 294533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977396.777, "dur": 32.902, + "args": { + "External id": 294534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333367977455.721, "dur": 201.176, + "args": { + "External id": 294535,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367977531.135, "dur": 5.406, + "args": { + "External id": 294536,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367977538.520, "dur": 3.868, + "args": { + "External id": 294537,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367977692.466, "dur": 27.193, + "args": { + "External id": 294538,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333367977745.281, "dur": 14.408, + "args": { + "External id": 294539,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977769.391, "dur": 41.673, + "args": { + "External id": 294540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977816.126, "dur": 34.645, + "args": { + "External id": 294541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977857.844, "dur": 19.989, + "args": { + "External id": 294542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977884.268, "dur": 29.299, + "args": { + "External id": 294543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977918.735, "dur": 23.700, + "args": { + "External id": 294544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333367977947.671, "dur": 28.072, + "args": { + "External id": 294545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333367977996.805, "dur": 20.455, + "args": { + "External id": 294546,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367978032.499, "dur": 39.095, + "args": { + "External id": 294547,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333367978092.933, "dur": 22.540, + "args": { + "External id": 294548,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333367978130.492, "dur": 15.641, + "args": { + "External id": 294549,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333367978157.414, "dur": 34.656, + "args": { + "External id": 294550,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978275.226, "dur": 14.115, + "args": { + "External id": 294551,"Record function id": 0, "Ev Idx": 1174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978278.096, "dur": 10.212, + "args": { + "External id": 294552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978282.196, "dur": 5.311, + "args": { + "External id": 294553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978283.506, "dur": 3.918, + "args": { + "External id": 294554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978293.128, "dur": 4.357, + "args": { + "External id": 294555,"Record function id": 0, "Ev Idx": 1178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978294.732, "dur": 2.312, + "args": { + "External id": 294556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978295.242, "dur": 1.300, + "args": { + "External id": 294557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978295.729, "dur": 0.728, + "args": { + "External id": 294558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978300.765, "dur": 3.693, + "args": { + "External id": 294559,"Record function id": 0, "Ev Idx": 1182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978301.995, "dur": 2.052, + "args": { + "External id": 294560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978302.473, "dur": 1.147, + "args": { + "External id": 294561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978302.843, "dur": 0.701, + "args": { + "External id": 294562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978307.689, "dur": 3.474, + "args": { + "External id": 294563,"Record function id": 0, "Ev Idx": 1186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978308.819, "dur": 1.928, + "args": { + "External id": 294564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978309.418, "dur": 0.914, + "args": { + "External id": 294565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978309.701, "dur": 0.556, + "args": { + "External id": 294566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978314.348, "dur": 3.556, + "args": { + "External id": 294567,"Record function id": 0, "Ev Idx": 1190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978315.509, "dur": 1.989, + "args": { + "External id": 294568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978315.985, "dur": 1.096, + "args": { + "External id": 294569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978316.378, "dur": 0.629, + "args": { + "External id": 294570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978321.050, "dur": 3.788, + "args": { + "External id": 294571,"Record function id": 0, "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978322.192, "dur": 2.244, + "args": { + "External id": 294572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978322.642, "dur": 1.384, + "args": { + "External id": 294573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978323.378, "dur": 0.574, + "args": { + "External id": 294574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978328.113, "dur": 8.189, + "args": { + "External id": 294575,"Record function id": 0, "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978329.182, "dur": 6.666, + "args": { + "External id": 294576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978331.765, "dur": 3.696, + "args": { + "External id": 294577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978332.691, "dur": 2.709, + "args": { + "External id": 294578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978339.426, "dur": 4.091, + "args": { + "External id": 294579,"Record function id": 0, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978340.704, "dur": 2.401, + "args": { + "External id": 294580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978341.158, "dur": 1.534, + "args": { + "External id": 294581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978341.806, "dur": 0.812, + "args": { + "External id": 294582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978347.812, "dur": 3.693, + "args": { + "External id": 294583,"Record function id": 0, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333367978349.005, "dur": 2.101, + "args": { + "External id": 294584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978349.426, "dur": 1.294, + "args": { + "External id": 294585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333367978349.960, "dur": 0.694, + "args": { + "External id": 294586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367978355.264, "dur": 36542.841, + "args": { + "External id": 294587,"Record function id": 0, "Sequence number": 1209209, "Fwd thread id": 1, "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333367978356.451, "dur": 36532.607, + "args": { + "External id": 294588,"Sequence number": 1209209, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1211 + } + }, + { + "ph": "f", "id": 23, "pid": 2070547, "tid": 2107622, "ts": 5333367978356.451, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2070547, "tid": 2107622, + "ts": 5333367978386.793, "dur": 35.855, + "args": { + "External id": 294589,"Record function id": 0, "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2070547, "tid": 2107622, + "ts": 5333367978429.609, "dur": 57.700, + "args": { + "External id": 294590,"Record function id": 0, "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2070547, "tid": 2107622, + "ts": 5333367978493.008, "dur": 36388.278, + "args": { + "External id": 294591,"Record function id": 0, "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367978579.014, "dur": 7.302, + "args": { + "External id": 294592,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333367978595.572, "dur": 4.496, + "args": { + "External id": 294593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367978614.148, "dur": 35401.392, + "args": { + "External id": 294594,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333367978665.256, "dur": 35341.208, + "args": { + "External id": 294595,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333367978739.934, "dur": 15.511, + "args": { + "External id": 294596,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333367978762.121, "dur": 35205.882, + "args": { + "External id": 294597,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333367978764.522, "dur": 35202.770, + "args": { + "External id": 294598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333367978768.554, "dur": 6.832, + "args": { + "External id": 294599,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333367978777.276, "dur": 35186.216, + "args": { + "External id": 294600,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368014106.025, "dur": 9.199, + "args": { + "External id": 294601,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368014108.771, "dur": 6.138, + "args": { + "External id": 294602,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368014144.984, "dur": 410.814, + "args": { + "External id": 294603,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368014184.074, "dur": 366.369, + "args": { + "External id": 294604,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1227, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368014196.199, "dur": 349.088, + "args": { + "External id": 294605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368014576.159, "dur": 2.156, + "args": { + "External id": 294606,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1229, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014672.041, "dur": 8.199, + "args": { + "External id": 294607,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014724.984, "dur": 1.121, + "args": { + "External id": 294608,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014744.819, "dur": 1.162, + "args": { + "External id": 294609,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014757.178, "dur": 0.720, + "args": { + "External id": 294610,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014769.954, "dur": 2.713, + "args": { + "External id": 294611,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014781.972, "dur": 0.751, + "args": { + "External id": 294612,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014792.682, "dur": 0.846, + "args": { + "External id": 294613,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014806.996, "dur": 1.816, + "args": { + "External id": 294614,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368014818.885, "dur": 2.206, + "args": { + "External id": 294615,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368014913.058, "dur": 2577.932, + "args": { + "External id": 294616,"Record function id": 0, "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2070547, "tid": 2107622, + "ts": 5333368014931.652, "dur": 972.489, + "args": { + "External id": 294617,"Record function id": 0, "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070547, "tid": 2107622, + "ts": 5333368014944.963, "dur": 311.929, + "args": { + "External id": 294618,"Record function id": 0, "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015016.070, "dur": 4.012, + "args": { + "External id": 294619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015023.483, "dur": 0.742, + "args": { + "External id": 294620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015026.395, "dur": 0.785, + "args": { + "External id": 294621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015028.818, "dur": 0.755, + "args": { + "External id": 294622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015031.651, "dur": 0.466, + "args": { + "External id": 294623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015033.810, "dur": 0.511, + "args": { + "External id": 294624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015036.007, "dur": 3.197, + "args": { + "External id": 294625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015040.901, "dur": 0.595, + "args": { + "External id": 294626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015043.154, "dur": 0.597, + "args": { + "External id": 294627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368015045.301, "dur": 0.701, + "args": { + "External id": 294628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368015063.533, "dur": 164.058, + "args": { + "External id": 294629,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368015079.121, "dur": 143.756, + "args": { + "External id": 294630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368015094.191, "dur": 12.342, + "args": { + "External id": 294631,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368015110.377, "dur": 81.770, + "args": { + "External id": 294632,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368015113.072, "dur": 78.593, + "args": { + "External id": 294633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015117.591, "dur": 5.620, + "args": { + "External id": 294634,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368015124.760, "dur": 65.897, + "args": { + "External id": 294635,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2070547, "tid": 2107622, + "ts": 5333368015329.807, "dur": 566.144, + "args": { + "External id": 294636,"Record function id": 0, "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070547, "tid": 2107622, + "ts": 5333368015346.952, "dur": 536.389, + "args": { + "External id": 294637,"Record function id": 0, "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368015397.966, "dur": 5.258, + "args": { + "External id": 294638,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368015418.733, "dur": 30.593, + "args": { + "External id": 294639,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015423.381, "dur": 1.237, + "args": { + "External id": 294640,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015426.576, "dur": 3.001, + "args": { + "External id": 294641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015431.158, "dur": 0.413, + "args": { + "External id": 294642,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015433.305, "dur": 0.392, + "args": { + "External id": 294643,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015436.104, "dur": 0.411, + "args": { + "External id": 294644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015437.827, "dur": 0.463, + "args": { + "External id": 294645,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015439.919, "dur": 0.347, + "args": { + "External id": 294646,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015443.050, "dur": 0.427, + "args": { + "External id": 294647,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015445.389, "dur": 0.325, + "args": { + "External id": 294648,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368015459.030, "dur": 31.104, + "args": { + "External id": 294649,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368015519.081, "dur": 138.063, + "args": { + "External id": 294650,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368015528.726, "dur": 5.180, + "args": { + "External id": 294651,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368015538.671, "dur": 9.478, + "args": { + "External id": 294652,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368015542.582, "dur": 5.181, + "args": { + "External id": 294653,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015546.004, "dur": 0.473, + "args": { + "External id": 294654,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368015555.090, "dur": 26.243, + "args": { + "External id": 294655,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015557.924, "dur": 0.364, + "args": { + "External id": 294656,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015560.916, "dur": 0.361, + "args": { + "External id": 294657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015562.754, "dur": 0.421, + "args": { + "External id": 294658,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015564.843, "dur": 1.004, + "args": { + "External id": 294659,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015567.551, "dur": 0.356, + "args": { + "External id": 294660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015569.465, "dur": 0.345, + "args": { + "External id": 294661,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015571.862, "dur": 2.371, + "args": { + "External id": 294662,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015575.929, "dur": 0.336, + "args": { + "External id": 294663,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368015577.625, "dur": 0.389, + "args": { + "External id": 294664,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368015591.878, "dur": 20.911, + "args": { + "External id": 294665,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368015704.287, "dur": 115.734, + "args": { + "External id": 294666,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368015731.031, "dur": 85.851, + "args": { + "External id": 294667,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1290, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368015739.867, "dur": 72.807, + "args": { + "External id": 294668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368015834.370, "dur": 1.657, + "args": { + "External id": 294669,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1292, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368015910.566, "dur": 1558.964, + "args": { + "External id": 294670,"Sequence number": 1209208, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1293 + } + }, + { + "ph": "f", "id": 24, "pid": 2070547, "tid": 2107622, "ts": 5333368015910.566, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016013.459, "dur": 101.926, + "args": { + "External id": 294671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368016152.758, "dur": 54.706, + "args": { + "External id": 294672,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016227.859, "dur": 52.203, + "args": { + "External id": 294673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016289.740, "dur": 32.668, + "args": { + "External id": 294674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016328.504, "dur": 44.205, + "args": { + "External id": 294675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016380.070, "dur": 26.504, + "args": { + "External id": 294676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016418.698, "dur": 41.325, + "args": { + "External id": 294677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368016483.815, "dur": 23.980, + "args": { + "External id": 294678,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368016524.684, "dur": 30.825, + "args": { + "External id": 294679,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368016573.275, "dur": 20.830, + "args": { + "External id": 294680,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368016605.907, "dur": 50.950, + "args": { + "External id": 294681,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016669.722, "dur": 33.589, + "args": { + "External id": 294682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368016706.919, "dur": 33.037, + "args": { + "External id": 294683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368016765.492, "dur": 168.461, + "args": { + "External id": 294684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368016843.368, "dur": 6.357, + "args": { + "External id": 294685,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368016851.813, "dur": 3.685, + "args": { + "External id": 294686,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368016964.356, "dur": 26.598, + "args": { + "External id": 294687,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368017002.548, "dur": 13.567, + "args": { + "External id": 294688,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368017022.899, "dur": 33.551, + "args": { + "External id": 294689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368017061.707, "dur": 32.313, + "args": { + "External id": 294690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368017100.071, "dur": 25.966, + "args": { + "External id": 294691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368017132.702, "dur": 28.628, + "args": { + "External id": 294692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368017188.556, "dur": 26.470, + "args": { + "External id": 294693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368017221.962, "dur": 30.004, + "args": { + "External id": 294694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368017270.923, "dur": 21.117, + "args": { + "External id": 294695,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368017308.634, "dur": 22.227, + "args": { + "External id": 294696,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368017345.942, "dur": 14.928, + "args": { + "External id": 294697,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368017374.489, "dur": 13.484, + "args": { + "External id": 294698,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368017417.393, "dur": 23.530, + "args": { + "External id": 294699,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017512.628, "dur": 17.529, + "args": { + "External id": 294700,"Record function id": 0, "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017515.779, "dur": 13.418, + "args": { + "External id": 294701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017519.718, "dur": 8.557, + "args": { + "External id": 294702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017524.173, "dur": 3.979, + "args": { + "External id": 294703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017533.972, "dur": 4.265, + "args": { + "External id": 294704,"Record function id": 0, "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017535.372, "dur": 2.432, + "args": { + "External id": 294705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017536.039, "dur": 1.276, + "args": { + "External id": 294706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017536.417, "dur": 0.810, + "args": { + "External id": 294707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017541.501, "dur": 4.221, + "args": { + "External id": 294708,"Record function id": 0, "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017543.112, "dur": 2.146, + "args": { + "External id": 294709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017543.709, "dur": 1.094, + "args": { + "External id": 294710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017544.065, "dur": 0.656, + "args": { + "External id": 294711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017548.925, "dur": 4.076, + "args": { + "External id": 294712,"Record function id": 0, "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017550.390, "dur": 2.177, + "args": { + "External id": 294713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017550.989, "dur": 1.131, + "args": { + "External id": 294714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017551.342, "dur": 0.690, + "args": { + "External id": 294715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017556.409, "dur": 5.926, + "args": { + "External id": 294716,"Record function id": 0, "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017557.635, "dur": 4.263, + "args": { + "External id": 294717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017558.231, "dur": 3.254, + "args": { + "External id": 294718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017558.834, "dur": 2.579, + "args": { + "External id": 294719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017565.397, "dur": 4.418, + "args": { + "External id": 294720,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017566.955, "dur": 2.453, + "args": { + "External id": 294721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017567.432, "dur": 1.565, + "args": { + "External id": 294722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017568.056, "dur": 0.874, + "args": { + "External id": 294723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017573.283, "dur": 4.140, + "args": { + "External id": 294724,"Record function id": 0, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017574.704, "dur": 2.257, + "args": { + "External id": 294725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017575.241, "dur": 1.311, + "args": { + "External id": 294726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017575.810, "dur": 0.671, + "args": { + "External id": 294727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017580.549, "dur": 6.893, + "args": { + "External id": 294728,"Record function id": 0, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017584.848, "dur": 2.177, + "args": { + "External id": 294729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017585.326, "dur": 1.270, + "args": { + "External id": 294730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017585.923, "dur": 0.600, + "args": { + "External id": 294731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017590.529, "dur": 3.880, + "args": { + "External id": 294732,"Record function id": 0, "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368017591.733, "dur": 2.248, + "args": { + "External id": 294733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017592.249, "dur": 1.289, + "args": { + "External id": 294734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368017592.852, "dur": 0.617, + "args": { + "External id": 294735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368017598.439, "dur": 37242.745, + "args": { + "External id": 294736,"Record function id": 0, "Sequence number": 1209207, "Fwd thread id": 1, "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368017600.075, "dur": 37232.342, + "args": { + "External id": 294737,"Sequence number": 1209207, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1360 + } + }, + { + "ph": "f", "id": 25, "pid": 2070547, "tid": 2107622, "ts": 5333368017600.075, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2070547, "tid": 2107622, + "ts": 5333368017663.381, "dur": 38.233, + "args": { + "External id": 294738,"Record function id": 0, "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2070547, "tid": 2107622, + "ts": 5333368017710.235, "dur": 59.975, + "args": { + "External id": 294739,"Record function id": 0, "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2070547, "tid": 2107622, + "ts": 5333368017776.806, "dur": 37048.008, + "args": { + "External id": 294740,"Record function id": 0, "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368017865.392, "dur": 7.353, + "args": { + "External id": 294741,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368017882.497, "dur": 4.870, + "args": { + "External id": 294742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368017901.805, "dur": 36072.629, + "args": { + "External id": 294743,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368017915.314, "dur": 36050.181, + "args": { + "External id": 294744,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368017958.800, "dur": 13.769, + "args": { + "External id": 294745,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368017978.977, "dur": 35947.638, + "args": { + "External id": 294746,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368017981.669, "dur": 35944.064, + "args": { + "External id": 294747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368017985.353, "dur": 7.429, + "args": { + "External id": 294748,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368017994.415, "dur": 35927.948, + "args": { + "External id": 294749,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368054061.137, "dur": 9.371, + "args": { + "External id": 294750,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368054063.859, "dur": 6.224, + "args": { + "External id": 294751,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368054097.985, "dur": 392.682, + "args": { + "External id": 294752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368054126.069, "dur": 359.464, + "args": { + "External id": 294753,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1376, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368054138.484, "dur": 341.335, + "args": { + "External id": 294754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368054513.007, "dur": 2.279, + "args": { + "External id": 294755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1378, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054575.761, "dur": 6.490, + "args": { + "External id": 294756,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054669.466, "dur": 2.265, + "args": { + "External id": 294757,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054690.539, "dur": 1.462, + "args": { + "External id": 294758,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054702.856, "dur": 3.222, + "args": { + "External id": 294759,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054717.867, "dur": 0.984, + "args": { + "External id": 294760,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054728.339, "dur": 0.810, + "args": { + "External id": 294761,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054739.053, "dur": 0.731, + "args": { + "External id": 294762,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054749.396, "dur": 3.588, + "args": { + "External id": 294763,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368054762.712, "dur": 0.704, + "args": { + "External id": 294764,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368054855.388, "dur": 2599.055, + "args": { + "External id": 294765,"Record function id": 0, "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2070547, "tid": 2107622, + "ts": 5333368054874.255, "dur": 1010.079, + "args": { + "External id": 294766,"Record function id": 0, "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070547, "tid": 2107622, + "ts": 5333368054890.047, "dur": 320.252, + "args": { + "External id": 294767,"Record function id": 0, "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054964.178, "dur": 3.927, + "args": { + "External id": 294768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054971.429, "dur": 0.884, + "args": { + "External id": 294769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054974.522, "dur": 0.600, + "args": { + "External id": 294770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054977.074, "dur": 0.615, + "args": { + "External id": 294771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054979.400, "dur": 2.507, + "args": { + "External id": 294772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054983.758, "dur": 1.011, + "args": { + "External id": 294773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054986.429, "dur": 1.516, + "args": { + "External id": 294774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054989.428, "dur": 0.652, + "args": { + "External id": 294775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054991.894, "dur": 0.710, + "args": { + "External id": 294776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368054994.436, "dur": 0.705, + "args": { + "External id": 294777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368055013.048, "dur": 145.845, + "args": { + "External id": 294778,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368055028.486, "dur": 126.588, + "args": { + "External id": 294779,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368055045.783, "dur": 12.332, + "args": { + "External id": 294780,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368055062.045, "dur": 65.253, + "args": { + "External id": 294781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368055066.076, "dur": 60.902, + "args": { + "External id": 294782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055069.202, "dur": 5.882, + "args": { + "External id": 294783,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368055076.801, "dur": 49.654, + "args": { + "External id": 294784,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2070547, "tid": 2107622, + "ts": 5333368055288.111, "dur": 588.340, + "args": { + "External id": 294785,"Record function id": 0, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070547, "tid": 2107622, + "ts": 5333368055304.762, "dur": 558.347, + "args": { + "External id": 294786,"Record function id": 0, "Ev Idx": 1409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368055358.517, "dur": 7.448, + "args": { + "External id": 294787,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368055380.756, "dur": 27.325, + "args": { + "External id": 294788,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055385.263, "dur": 1.586, + "args": { + "External id": 294789,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055389.940, "dur": 0.370, + "args": { + "External id": 294790,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055391.460, "dur": 0.319, + "args": { + "External id": 294791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055392.725, "dur": 0.367, + "args": { + "External id": 294792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055395.336, "dur": 0.406, + "args": { + "External id": 294793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055396.638, "dur": 0.398, + "args": { + "External id": 294794,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055397.834, "dur": 1.453, + "args": { + "External id": 294795,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055400.366, "dur": 2.134, + "args": { + "External id": 294796,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055403.916, "dur": 0.315, + "args": { + "External id": 294797,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368055432.286, "dur": 34.151, + "args": { + "External id": 294798,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368055495.860, "dur": 96.894, + "args": { + "External id": 294799,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368055505.398, "dur": 4.248, + "args": { + "External id": 294800,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368055514.268, "dur": 9.416, + "args": { + "External id": 294801,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368055518.210, "dur": 5.078, + "args": { + "External id": 294802,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055521.588, "dur": 0.583, + "args": { + "External id": 294803,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368055530.323, "dur": 24.151, + "args": { + "External id": 294804,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055532.559, "dur": 0.320, + "args": { + "External id": 294805,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055534.300, "dur": 1.880, + "args": { + "External id": 294806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055537.686, "dur": 0.332, + "args": { + "External id": 294807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055539.316, "dur": 0.343, + "args": { + "External id": 294808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055542.655, "dur": 2.247, + "args": { + "External id": 294809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055545.887, "dur": 0.205, + "args": { + "External id": 294810,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055546.930, "dur": 0.376, + "args": { + "External id": 294811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055549.204, "dur": 0.395, + "args": { + "External id": 294812,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368055550.696, "dur": 0.339, + "args": { + "External id": 294813,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368055565.906, "dur": 19.468, + "args": { + "External id": 294814,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368055685.249, "dur": 113.999, + "args": { + "External id": 294815,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368055708.067, "dur": 87.786, + "args": { + "External id": 294816,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1439, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368055717.679, "dur": 74.117, + "args": { + "External id": 294817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368055811.289, "dur": 1.624, + "args": { + "External id": 294818,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1441, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368055891.205, "dur": 1536.227, + "args": { + "External id": 294819,"Sequence number": 1209206, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1442 + } + }, + { + "ph": "f", "id": 26, "pid": 2070547, "tid": 2107622, "ts": 5333368055891.205, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368055994.894, "dur": 102.480, + "args": { + "External id": 294820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368056137.432, "dur": 54.456, + "args": { + "External id": 294821,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056210.295, "dur": 53.125, + "args": { + "External id": 294822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056273.567, "dur": 31.124, + "args": { + "External id": 294823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056313.148, "dur": 44.801, + "args": { + "External id": 294824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056364.159, "dur": 27.267, + "args": { + "External id": 294825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056398.401, "dur": 40.531, + "args": { + "External id": 294826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368056459.874, "dur": 23.351, + "args": { + "External id": 294827,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368056498.522, "dur": 26.839, + "args": { + "External id": 294828,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368056545.297, "dur": 17.758, + "args": { + "External id": 294829,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368056573.537, "dur": 14.102, + "args": { + "External id": 294830,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056595.548, "dur": 63.662, + "args": { + "External id": 294831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056665.174, "dur": 36.653, + "args": { + "External id": 294832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368056731.973, "dur": 161.672, + "args": { + "External id": 294833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368056806.895, "dur": 5.216, + "args": { + "External id": 294834,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368056813.962, "dur": 2.466, + "args": { + "External id": 294835,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368056922.866, "dur": 26.672, + "args": { + "External id": 294836,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368056959.384, "dur": 14.977, + "args": { + "External id": 294837,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368056981.347, "dur": 33.538, + "args": { + "External id": 294838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368057020.334, "dur": 32.788, + "args": { + "External id": 294839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368057060.304, "dur": 21.529, + "args": { + "External id": 294840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368057087.500, "dur": 28.885, + "args": { + "External id": 294841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368057121.459, "dur": 21.353, + "args": { + "External id": 294842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368057148.825, "dur": 43.786, + "args": { + "External id": 294843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368057213.710, "dur": 23.744, + "args": { + "External id": 294844,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368057254.098, "dur": 25.577, + "args": { + "External id": 294845,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368057294.098, "dur": 36.174, + "args": { + "External id": 294846,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368057353.715, "dur": 16.156, + "args": { + "External id": 294847,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368057384.435, "dur": 16.742, + "args": { + "External id": 294848,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057475.530, "dur": 14.218, + "args": { + "External id": 294849,"Record function id": 0, "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057478.790, "dur": 10.002, + "args": { + "External id": 294850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057482.832, "dur": 5.100, + "args": { + "External id": 294851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057484.077, "dur": 3.764, + "args": { + "External id": 294852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057496.378, "dur": 4.655, + "args": { + "External id": 294853,"Record function id": 0, "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057497.832, "dur": 2.784, + "args": { + "External id": 294854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057498.548, "dur": 1.605, + "args": { + "External id": 294855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057499.240, "dur": 0.812, + "args": { + "External id": 294856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057504.289, "dur": 4.090, + "args": { + "External id": 294857,"Record function id": 0, "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057505.561, "dur": 2.382, + "args": { + "External id": 294858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057506.067, "dur": 1.450, + "args": { + "External id": 294859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057506.597, "dur": 0.851, + "args": { + "External id": 294860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057511.591, "dur": 3.643, + "args": { + "External id": 294861,"Record function id": 0, "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057512.808, "dur": 2.027, + "args": { + "External id": 294862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057513.403, "dur": 0.998, + "args": { + "External id": 294863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057513.718, "dur": 0.597, + "args": { + "External id": 294864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057518.454, "dur": 4.992, + "args": { + "External id": 294865,"Record function id": 0, "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057519.401, "dur": 3.645, + "args": { + "External id": 294866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057519.863, "dur": 2.769, + "args": { + "External id": 294867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057520.294, "dur": 2.259, + "args": { + "External id": 294868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057526.667, "dur": 4.260, + "args": { + "External id": 294869,"Record function id": 0, "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057527.858, "dur": 2.638, + "args": { + "External id": 294870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057528.440, "dur": 1.655, + "args": { + "External id": 294871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057529.126, "dur": 0.894, + "args": { + "External id": 294872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057534.120, "dur": 3.515, + "args": { + "External id": 294873,"Record function id": 0, "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057535.282, "dur": 1.960, + "args": { + "External id": 294874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057535.738, "dur": 1.075, + "args": { + "External id": 294875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057536.333, "dur": 0.413, + "args": { + "External id": 294876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057540.719, "dur": 3.793, + "args": { + "External id": 294877,"Record function id": 0, "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057541.913, "dur": 2.191, + "args": { + "External id": 294878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057542.358, "dur": 1.343, + "args": { + "External id": 294879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057542.998, "dur": 0.629, + "args": { + "External id": 294880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057547.525, "dur": 3.634, + "args": { + "External id": 294881,"Record function id": 0, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368057548.667, "dur": 2.085, + "args": { + "External id": 294882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057549.100, "dur": 1.251, + "args": { + "External id": 294883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368057549.690, "dur": 0.588, + "args": { + "External id": 294884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368057554.897, "dur": 36991.527, + "args": { + "External id": 294885,"Record function id": 0, "Sequence number": 1209205, "Fwd thread id": 1, "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368057556.378, "dur": 36981.283, + "args": { + "External id": 294886,"Sequence number": 1209205, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1509 + } + }, + { + "ph": "f", "id": 27, "pid": 2070547, "tid": 2107622, "ts": 5333368057556.378, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2070547, "tid": 2107622, + "ts": 5333368057582.560, "dur": 84.118, + "args": { + "External id": 294887,"Record function id": 0, "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2070547, "tid": 2107622, + "ts": 5333368057677.965, "dur": 59.455, + "args": { + "External id": 294888,"Record function id": 0, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2070547, "tid": 2107622, + "ts": 5333368057743.339, "dur": 36786.263, + "args": { + "External id": 294889,"Record function id": 0, "Ev Idx": 1512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368057831.199, "dur": 6.898, + "args": { + "External id": 294890,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368057847.265, "dur": 4.821, + "args": { + "External id": 294891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368057866.683, "dur": 35863.541, + "args": { + "External id": 294892,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368057879.897, "dur": 35841.046, + "args": { + "External id": 294893,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368057928.865, "dur": 13.879, + "args": { + "External id": 294894,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368057948.953, "dur": 35733.756, + "args": { + "External id": 294895,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368057951.440, "dur": 35730.463, + "args": { + "External id": 294896,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368057954.544, "dur": 7.689, + "args": { + "External id": 294897,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368057963.659, "dur": 35714.787, + "args": { + "External id": 294898,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368093816.312, "dur": 9.189, + "args": { + "External id": 294899,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368093819.079, "dur": 6.010, + "args": { + "External id": 294900,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368093854.066, "dur": 375.815, + "args": { + "External id": 294901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368093878.814, "dur": 346.191, + "args": { + "External id": 294902,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1525, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368093891.223, "dur": 328.349, + "args": { + "External id": 294903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368094251.047, "dur": 2.181, + "args": { + "External id": 294904,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1527, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094311.539, "dur": 6.887, + "args": { + "External id": 294905,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094359.689, "dur": 1.334, + "args": { + "External id": 294906,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094375.680, "dur": 1.448, + "args": { + "External id": 294907,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094388.824, "dur": 2.356, + "args": { + "External id": 294908,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094401.945, "dur": 0.715, + "args": { + "External id": 294909,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094411.820, "dur": 0.779, + "args": { + "External id": 294910,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094423.230, "dur": 0.922, + "args": { + "External id": 294911,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094434.561, "dur": 2.858, + "args": { + "External id": 294912,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094468.874, "dur": 1.053, + "args": { + "External id": 294913,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368094559.330, "dur": 2660.016, + "args": { + "External id": 294914,"Record function id": 0, "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2070547, "tid": 2107622, + "ts": 5333368094577.981, "dur": 992.046, + "args": { + "External id": 294915,"Record function id": 0, "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070547, "tid": 2107622, + "ts": 5333368094592.828, "dur": 365.462, + "args": { + "External id": 294916,"Record function id": 0, "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094717.376, "dur": 4.759, + "args": { + "External id": 294917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094725.346, "dur": 0.943, + "args": { + "External id": 294918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094728.030, "dur": 0.720, + "args": { + "External id": 294919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094730.059, "dur": 0.902, + "args": { + "External id": 294920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094732.147, "dur": 2.098, + "args": { + "External id": 294921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094736.572, "dur": 0.565, + "args": { + "External id": 294922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094738.953, "dur": 1.984, + "args": { + "External id": 294923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094742.751, "dur": 0.827, + "args": { + "External id": 294924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094745.058, "dur": 0.563, + "args": { + "External id": 294925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368094747.435, "dur": 0.819, + "args": { + "External id": 294926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368094766.763, "dur": 162.178, + "args": { + "External id": 294927,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368094788.466, "dur": 135.640, + "args": { + "External id": 294928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368094812.039, "dur": 12.793, + "args": { + "External id": 294929,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368094828.459, "dur": 67.355, + "args": { + "External id": 294930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368094830.796, "dur": 64.646, + "args": { + "External id": 294931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368094834.492, "dur": 6.248, + "args": { + "External id": 294932,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368094842.234, "dur": 52.625, + "args": { + "External id": 294933,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2070547, "tid": 2107622, + "ts": 5333368095031.052, "dur": 530.235, + "args": { + "External id": 294934,"Record function id": 0, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070547, "tid": 2107622, + "ts": 5333368095046.638, "dur": 502.112, + "args": { + "External id": 294935,"Record function id": 0, "Ev Idx": 1558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368095100.527, "dur": 5.767, + "args": { + "External id": 294936,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368095121.575, "dur": 26.583, + "args": { + "External id": 294937,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095126.179, "dur": 1.390, + "args": { + "External id": 294938,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095128.979, "dur": 1.963, + "args": { + "External id": 294939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095132.446, "dur": 0.337, + "args": { + "External id": 294940,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095133.574, "dur": 0.170, + "args": { + "External id": 294941,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095136.180, "dur": 0.341, + "args": { + "External id": 294942,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095137.503, "dur": 0.139, + "args": { + "External id": 294943,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095138.871, "dur": 0.147, + "args": { + "External id": 294944,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095140.455, "dur": 1.670, + "args": { + "External id": 294945,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095143.434, "dur": 0.405, + "args": { + "External id": 294946,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368095157.490, "dur": 45.361, + "args": { + "External id": 294947,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368095235.341, "dur": 93.625, + "args": { + "External id": 294948,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368095245.708, "dur": 4.272, + "args": { + "External id": 294949,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368095255.039, "dur": 9.731, + "args": { + "External id": 294950,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368095259.090, "dur": 5.255, + "args": { + "External id": 294951,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095262.338, "dur": 0.572, + "args": { + "External id": 294952,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368095272.410, "dur": 20.982, + "args": { + "External id": 294953,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095274.263, "dur": 0.440, + "args": { + "External id": 294954,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095276.385, "dur": 0.410, + "args": { + "External id": 294955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095278.245, "dur": 0.451, + "args": { + "External id": 294956,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095279.760, "dur": 1.204, + "args": { + "External id": 294957,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095282.260, "dur": 1.797, + "args": { + "External id": 294958,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095284.788, "dur": 0.540, + "args": { + "External id": 294959,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095287.173, "dur": 0.347, + "args": { + "External id": 294960,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095288.428, "dur": 0.320, + "args": { + "External id": 294961,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368095289.938, "dur": 0.154, + "args": { + "External id": 294962,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368095304.078, "dur": 17.301, + "args": { + "External id": 294963,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368095371.900, "dur": 114.395, + "args": { + "External id": 294964,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368095396.713, "dur": 86.343, + "args": { + "External id": 294965,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1588, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368095405.922, "dur": 72.701, + "args": { + "External id": 294966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368095501.839, "dur": 1.749, + "args": { + "External id": 294967,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1590, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368095577.112, "dur": 1620.169, + "args": { + "External id": 294968,"Sequence number": 1209204, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1591 + } + }, + { + "ph": "f", "id": 28, "pid": 2070547, "tid": 2107622, "ts": 5333368095577.112, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368095720.618, "dur": 105.889, + "args": { + "External id": 294969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368095869.623, "dur": 36.268, + "args": { + "External id": 294970,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368095921.076, "dur": 47.093, + "args": { + "External id": 294971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368095977.246, "dur": 30.414, + "args": { + "External id": 294972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096015.113, "dur": 43.064, + "args": { + "External id": 294973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096064.621, "dur": 26.307, + "args": { + "External id": 294974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096097.293, "dur": 39.988, + "args": { + "External id": 294975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368096160.352, "dur": 38.568, + "args": { + "External id": 294976,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368096219.073, "dur": 27.744, + "args": { + "External id": 294977,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368096270.672, "dur": 16.917, + "args": { + "External id": 294978,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368096301.330, "dur": 13.014, + "args": { + "External id": 294979,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096323.232, "dur": 31.308, + "args": { + "External id": 294980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096357.686, "dur": 32.103, + "args": { + "External id": 294981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368096417.628, "dur": 166.827, + "args": { + "External id": 294982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368096493.230, "dur": 6.082, + "args": { + "External id": 294983,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368096501.215, "dur": 2.812, + "args": { + "External id": 294984,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368096665.431, "dur": 31.065, + "args": { + "External id": 294985,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368096711.040, "dur": 14.690, + "args": { + "External id": 294986,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096735.510, "dur": 43.087, + "args": { + "External id": 294987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096784.144, "dur": 35.294, + "args": { + "External id": 294988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096826.889, "dur": 25.214, + "args": { + "External id": 294989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096857.711, "dur": 28.766, + "args": { + "External id": 294990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096895.480, "dur": 21.327, + "args": { + "External id": 294991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368096922.635, "dur": 28.630, + "args": { + "External id": 294992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368096971.164, "dur": 21.252, + "args": { + "External id": 294993,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368097009.843, "dur": 25.153, + "args": { + "External id": 294994,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368097065.901, "dur": 24.804, + "args": { + "External id": 294995,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368097106.895, "dur": 14.235, + "args": { + "External id": 294996,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368097135.898, "dur": 13.672, + "args": { + "External id": 294997,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097241.413, "dur": 17.543, + "args": { + "External id": 294998,"Record function id": 0, "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097244.570, "dur": 13.294, + "args": { + "External id": 294999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097248.611, "dur": 8.365, + "args": { + "External id": 295000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097249.932, "dur": 6.949, + "args": { + "External id": 295001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097262.715, "dur": 4.936, + "args": { + "External id": 295002,"Record function id": 0, "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097264.037, "dur": 3.143, + "args": { + "External id": 295003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097265.105, "dur": 1.587, + "args": { + "External id": 295004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097265.605, "dur": 0.999, + "args": { + "External id": 295005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097270.866, "dur": 4.007, + "args": { + "External id": 295006,"Record function id": 0, "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097271.906, "dur": 2.549, + "args": { + "External id": 295007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097272.811, "dur": 1.217, + "args": { + "External id": 295008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097273.251, "dur": 0.683, + "args": { + "External id": 295009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097278.315, "dur": 4.146, + "args": { + "External id": 295010,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097279.552, "dur": 2.499, + "args": { + "External id": 295011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097280.531, "dur": 1.143, + "args": { + "External id": 295012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097280.839, "dur": 0.727, + "args": { + "External id": 295013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097285.982, "dur": 6.871, + "args": { + "External id": 295014,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097287.091, "dur": 5.327, + "args": { + "External id": 295015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097288.020, "dur": 3.993, + "args": { + "External id": 295016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097288.792, "dur": 3.161, + "args": { + "External id": 295017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097296.206, "dur": 4.862, + "args": { + "External id": 295018,"Record function id": 0, "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097297.460, "dur": 3.204, + "args": { + "External id": 295019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097298.319, "dur": 1.946, + "args": { + "External id": 295020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097299.252, "dur": 0.946, + "args": { + "External id": 295021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097304.417, "dur": 4.609, + "args": { + "External id": 295022,"Record function id": 0, "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097305.787, "dur": 2.810, + "args": { + "External id": 295023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097306.757, "dur": 1.442, + "args": { + "External id": 295024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097307.473, "dur": 0.662, + "args": { + "External id": 295025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097312.472, "dur": 7.149, + "args": { + "External id": 295026,"Record function id": 0, "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097316.691, "dur": 2.504, + "args": { + "External id": 295027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097317.516, "dur": 1.301, + "args": { + "External id": 295028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097318.095, "dur": 0.633, + "args": { + "External id": 295029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097322.916, "dur": 3.934, + "args": { + "External id": 295030,"Record function id": 0, "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368097324.125, "dur": 2.307, + "args": { + "External id": 295031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097324.888, "dur": 1.174, + "args": { + "External id": 295032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368097325.450, "dur": 0.542, + "args": { + "External id": 295033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368097330.804, "dur": 38198.995, + "args": { + "External id": 295034,"Record function id": 0, "Sequence number": 1209203, "Fwd thread id": 1, "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368097332.471, "dur": 38188.120, + "args": { + "External id": 295035,"Sequence number": 1209203, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1658 + } + }, + { + "ph": "f", "id": 29, "pid": 2070547, "tid": 2107622, "ts": 5333368097332.471, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2070547, "tid": 2107622, + "ts": 5333368097362.588, "dur": 39.570, + "args": { + "External id": 295036,"Record function id": 0, "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2070547, "tid": 2107622, + "ts": 5333368097410.178, "dur": 62.995, + "args": { + "External id": 295037,"Record function id": 0, "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2070547, "tid": 2107622, + "ts": 5333368097478.785, "dur": 38034.343, + "args": { + "External id": 295038,"Record function id": 0, "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368097567.932, "dur": 6.639, + "args": { + "External id": 295039,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368097583.408, "dur": 4.535, + "args": { + "External id": 295040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368097603.376, "dur": 37108.094, + "args": { + "External id": 295041,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368097616.290, "dur": 37084.466, + "args": { + "External id": 295042,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368097738.812, "dur": 15.112, + "args": { + "External id": 295043,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368097760.346, "dur": 36898.007, + "args": { + "External id": 295044,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368097762.782, "dur": 36894.494, + "args": { + "External id": 295045,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368097767.572, "dur": 6.895, + "args": { + "External id": 295046,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368097776.032, "dur": 36876.733, + "args": { + "External id": 295047,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368134813.883, "dur": 11.292, + "args": { + "External id": 295048,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368134817.133, "dur": 7.550, + "args": { + "External id": 295049,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368134856.816, "dur": 375.001, + "args": { + "External id": 295050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368134884.498, "dur": 341.679, + "args": { + "External id": 295051,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1674, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368134896.690, "dur": 323.364, + "args": { + "External id": 295052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368135253.407, "dur": 2.318, + "args": { + "External id": 295053,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1676, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135316.728, "dur": 6.746, + "args": { + "External id": 295054,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135365.613, "dur": 1.440, + "args": { + "External id": 295055,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135382.577, "dur": 1.226, + "args": { + "External id": 295056,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135394.075, "dur": 2.430, + "args": { + "External id": 295057,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135406.297, "dur": 0.724, + "args": { + "External id": 295058,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135415.556, "dur": 0.959, + "args": { + "External id": 295059,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135426.925, "dur": 0.872, + "args": { + "External id": 295060,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135437.703, "dur": 3.123, + "args": { + "External id": 295061,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135450.563, "dur": 0.629, + "args": { + "External id": 295062,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368135544.166, "dur": 2675.602, + "args": { + "External id": 295063,"Record function id": 0, "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2070547, "tid": 2107622, + "ts": 5333368135563.274, "dur": 1005.937, + "args": { + "External id": 295064,"Record function id": 0, "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070547, "tid": 2107622, + "ts": 5333368135578.615, "dur": 343.215, + "args": { + "External id": 295065,"Record function id": 0, "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135691.185, "dur": 4.608, + "args": { + "External id": 295066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135699.263, "dur": 0.848, + "args": { + "External id": 295067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135702.488, "dur": 0.675, + "args": { + "External id": 295068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135705.038, "dur": 0.625, + "args": { + "External id": 295069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135707.542, "dur": 2.213, + "args": { + "External id": 295070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135711.582, "dur": 0.739, + "args": { + "External id": 295071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135714.187, "dur": 1.407, + "args": { + "External id": 295072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135717.339, "dur": 0.508, + "args": { + "External id": 295073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135719.908, "dur": 0.581, + "args": { + "External id": 295074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368135722.006, "dur": 0.529, + "args": { + "External id": 295075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368135741.080, "dur": 151.027, + "args": { + "External id": 295076,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368135757.057, "dur": 129.808, + "args": { + "External id": 295077,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368135772.738, "dur": 13.040, + "args": { + "External id": 295078,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368135790.241, "dur": 67.911, + "args": { + "External id": 295079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368135794.189, "dur": 63.697, + "args": { + "External id": 295080,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368135798.033, "dur": 7.326, + "args": { + "External id": 295081,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368135806.934, "dur": 50.247, + "args": { + "External id": 295082,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2070547, "tid": 2107622, + "ts": 5333368135998.170, "dur": 563.826, + "args": { + "External id": 295083,"Record function id": 0, "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070547, "tid": 2107622, + "ts": 5333368136015.419, "dur": 533.974, + "args": { + "External id": 295084,"Record function id": 0, "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368136068.956, "dur": 6.211, + "args": { + "External id": 295085,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368136090.263, "dur": 27.865, + "args": { + "External id": 295086,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136094.281, "dur": 1.503, + "args": { + "External id": 295087,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136099.715, "dur": 0.270, + "args": { + "External id": 295088,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136101.375, "dur": 0.215, + "args": { + "External id": 295089,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136102.998, "dur": 0.219, + "args": { + "External id": 295090,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136105.766, "dur": 0.343, + "args": { + "External id": 295091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136107.436, "dur": 0.312, + "args": { + "External id": 295092,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136108.597, "dur": 1.661, + "args": { + "External id": 295093,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136111.366, "dur": 2.253, + "args": { + "External id": 295094,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136114.465, "dur": 0.373, + "args": { + "External id": 295095,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368136126.768, "dur": 30.293, + "args": { + "External id": 295096,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368136203.797, "dur": 122.461, + "args": { + "External id": 295097,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368136214.261, "dur": 5.244, + "args": { + "External id": 295098,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368136224.875, "dur": 10.075, + "args": { + "External id": 295099,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368136228.924, "dur": 5.595, + "args": { + "External id": 295100,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136232.281, "dur": 0.847, + "args": { + "External id": 295101,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368136242.224, "dur": 22.518, + "args": { + "External id": 295102,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136244.017, "dur": 1.636, + "args": { + "External id": 295103,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136247.189, "dur": 0.240, + "args": { + "External id": 295104,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136248.454, "dur": 0.371, + "args": { + "External id": 295105,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136251.157, "dur": 0.458, + "args": { + "External id": 295106,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136252.619, "dur": 1.604, + "args": { + "External id": 295107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136255.587, "dur": 0.307, + "args": { + "External id": 295108,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136257.655, "dur": 0.390, + "args": { + "External id": 295109,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136259.514, "dur": 0.383, + "args": { + "External id": 295110,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368136260.805, "dur": 0.804, + "args": { + "External id": 295111,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368136295.910, "dur": 22.737, + "args": { + "External id": 295112,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368136370.376, "dur": 111.335, + "args": { + "External id": 295113,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368136396.229, "dur": 82.298, + "args": { + "External id": 295114,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1737, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368136405.422, "dur": 69.062, + "args": { + "External id": 295115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368136496.369, "dur": 1.644, + "args": { + "External id": 295116,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1739, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368136579.219, "dur": 1614.475, + "args": { + "External id": 295117,"Sequence number": 1209202, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1740 + } + }, + { + "ph": "f", "id": 30, "pid": 2070547, "tid": 2107622, "ts": 5333368136579.219, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368136723.828, "dur": 106.055, + "args": { + "External id": 295118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368136873.849, "dur": 42.050, + "args": { + "External id": 295119,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368136931.730, "dur": 49.784, + "args": { + "External id": 295120,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368136990.817, "dur": 31.482, + "args": { + "External id": 295121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137030.646, "dur": 44.719, + "args": { + "External id": 295122,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137082.080, "dur": 27.866, + "args": { + "External id": 295123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137117.255, "dur": 40.983, + "args": { + "External id": 295124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368137203.970, "dur": 23.748, + "args": { + "External id": 295125,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368137245.854, "dur": 26.051, + "args": { + "External id": 295126,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368137296.347, "dur": 16.790, + "args": { + "External id": 295127,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368137327.065, "dur": 14.482, + "args": { + "External id": 295128,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137351.357, "dur": 31.724, + "args": { + "External id": 295129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137386.224, "dur": 32.394, + "args": { + "External id": 295130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368137454.144, "dur": 203.229, + "args": { + "External id": 295131,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368137528.389, "dur": 6.011, + "args": { + "External id": 295132,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368137536.662, "dur": 3.440, + "args": { + "External id": 295133,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368137691.598, "dur": 25.234, + "args": { + "External id": 295134,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368137729.569, "dur": 13.142, + "args": { + "External id": 295135,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137751.368, "dur": 40.099, + "args": { + "External id": 295136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137796.911, "dur": 34.401, + "args": { + "External id": 295137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137838.664, "dur": 23.315, + "args": { + "External id": 295138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137867.348, "dur": 28.696, + "args": { + "External id": 295139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137905.722, "dur": 20.130, + "args": { + "External id": 295140,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368137931.855, "dur": 29.292, + "args": { + "External id": 295141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368137989.310, "dur": 28.354, + "args": { + "External id": 295142,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368138039.429, "dur": 22.287, + "args": { + "External id": 295143,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368138075.699, "dur": 14.439, + "args": { + "External id": 295144,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368138104.930, "dur": 17.178, + "args": { + "External id": 295145,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368138135.766, "dur": 14.835, + "args": { + "External id": 295146,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138245.785, "dur": 14.021, + "args": { + "External id": 295147,"Record function id": 0, "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138248.787, "dur": 10.036, + "args": { + "External id": 295148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138252.973, "dur": 5.134, + "args": { + "External id": 295149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138254.181, "dur": 3.841, + "args": { + "External id": 295150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138263.783, "dur": 4.504, + "args": { + "External id": 295151,"Record function id": 0, "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138265.285, "dur": 2.568, + "args": { + "External id": 295152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138266.260, "dur": 1.129, + "args": { + "External id": 295153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138266.565, "dur": 0.754, + "args": { + "External id": 295154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138271.607, "dur": 4.203, + "args": { + "External id": 295155,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138273.062, "dur": 2.325, + "args": { + "External id": 295156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138273.820, "dur": 1.169, + "args": { + "External id": 295157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138274.199, "dur": 0.724, + "args": { + "External id": 295158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138279.036, "dur": 5.151, + "args": { + "External id": 295159,"Record function id": 0, "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138280.464, "dur": 3.270, + "args": { + "External id": 295160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138280.908, "dur": 2.411, + "args": { + "External id": 295161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138281.164, "dur": 2.088, + "args": { + "External id": 295162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138287.625, "dur": 4.204, + "args": { + "External id": 295163,"Record function id": 0, "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138289.089, "dur": 2.313, + "args": { + "External id": 295164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138289.692, "dur": 1.313, + "args": { + "External id": 295165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138290.074, "dur": 0.858, + "args": { + "External id": 295166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138294.994, "dur": 7.250, + "args": { + "External id": 295167,"Record function id": 0, "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138296.186, "dur": 5.649, + "args": { + "External id": 295168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138296.647, "dur": 4.786, + "args": { + "External id": 295169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138300.704, "dur": 0.654, + "args": { + "External id": 295170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138305.390, "dur": 3.794, + "args": { + "External id": 295171,"Record function id": 0, "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138306.744, "dur": 2.022, + "args": { + "External id": 295172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138307.188, "dur": 1.187, + "args": { + "External id": 295173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138307.628, "dur": 0.682, + "args": { + "External id": 295174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138312.467, "dur": 3.837, + "args": { + "External id": 295175,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138313.844, "dur": 2.046, + "args": { + "External id": 295176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138314.476, "dur": 1.003, + "args": { + "External id": 295177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138314.829, "dur": 0.587, + "args": { + "External id": 295178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138319.380, "dur": 3.440, + "args": { + "External id": 295179,"Record function id": 0, "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368138320.623, "dur": 1.737, + "args": { + "External id": 295180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138321.085, "dur": 0.884, + "args": { + "External id": 295181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368138321.456, "dur": 0.447, + "args": { + "External id": 295182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368138326.480, "dur": 35815.579, + "args": { + "External id": 295183,"Record function id": 0, "Sequence number": 1209201, "Fwd thread id": 1, "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368138327.843, "dur": 35805.524, + "args": { + "External id": 295184,"Sequence number": 1209201, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1807 + } + }, + { + "ph": "f", "id": 31, "pid": 2070547, "tid": 2107622, "ts": 5333368138327.843, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2070547, "tid": 2107622, + "ts": 5333368138357.260, "dur": 39.227, + "args": { + "External id": 295185,"Record function id": 0, "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2070547, "tid": 2107622, + "ts": 5333368138404.103, "dur": 61.401, + "args": { + "External id": 295186,"Record function id": 0, "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2070547, "tid": 2107622, + "ts": 5333368138471.195, "dur": 35653.922, + "args": { + "External id": 295187,"Record function id": 0, "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368138563.187, "dur": 6.851, + "args": { + "External id": 295188,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368138581.978, "dur": 4.576, + "args": { + "External id": 295189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368138601.008, "dur": 34710.016, + "args": { + "External id": 295190,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368138613.579, "dur": 34687.895, + "args": { + "External id": 295191,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368138746.540, "dur": 16.967, + "args": { + "External id": 295192,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368138772.896, "dur": 34491.142, + "args": { + "External id": 295193,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368138775.159, "dur": 34488.157, + "args": { + "External id": 295194,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368138778.193, "dur": 5.892, + "args": { + "External id": 295195,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368138785.656, "dur": 34474.155, + "args": { + "External id": 295196,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368173401.118, "dur": 8.629, + "args": { + "External id": 295197,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368173403.763, "dur": 5.629, + "args": { + "External id": 295198,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368173440.767, "dur": 400.819, + "args": { + "External id": 295199,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368173465.948, "dur": 370.631, + "args": { + "External id": 295200,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1823, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368173477.292, "dur": 353.612, + "args": { + "External id": 295201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368173860.928, "dur": 2.094, + "args": { + "External id": 295202,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1825, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368173923.227, "dur": 6.637, + "args": { + "External id": 295203,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368173972.773, "dur": 1.257, + "args": { + "External id": 295204,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368173989.956, "dur": 1.260, + "args": { + "External id": 295205,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174004.522, "dur": 0.617, + "args": { + "External id": 295206,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174015.417, "dur": 0.904, + "args": { + "External id": 295207,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174025.884, "dur": 0.975, + "args": { + "External id": 295208,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174038.236, "dur": 0.830, + "args": { + "External id": 295209,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174052.029, "dur": 1.615, + "args": { + "External id": 295210,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174063.946, "dur": 0.641, + "args": { + "External id": 295211,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368174155.841, "dur": 2676.131, + "args": { + "External id": 295212,"Record function id": 0, "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2070547, "tid": 2107622, + "ts": 5333368174191.543, "dur": 968.041, + "args": { + "External id": 295213,"Record function id": 0, "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070547, "tid": 2107622, + "ts": 5333368174207.538, "dur": 310.828, + "args": { + "External id": 295214,"Record function id": 0, "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174289.219, "dur": 4.741, + "args": { + "External id": 295215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174297.458, "dur": 0.902, + "args": { + "External id": 295216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174300.482, "dur": 0.507, + "args": { + "External id": 295217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174303.056, "dur": 1.978, + "args": { + "External id": 295218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174307.036, "dur": 0.825, + "args": { + "External id": 295219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174309.551, "dur": 0.698, + "args": { + "External id": 295220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174311.860, "dur": 1.514, + "args": { + "External id": 295221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174315.541, "dur": 0.562, + "args": { + "External id": 295222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174317.864, "dur": 0.691, + "args": { + "External id": 295223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368174320.209, "dur": 0.661, + "args": { + "External id": 295224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368174338.837, "dur": 151.729, + "args": { + "External id": 295225,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368174354.614, "dur": 131.287, + "args": { + "External id": 295226,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368174370.329, "dur": 14.057, + "args": { + "External id": 295227,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368174388.537, "dur": 68.467, + "args": { + "External id": 295228,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368174391.032, "dur": 65.671, + "args": { + "External id": 295229,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174394.216, "dur": 8.873, + "args": { + "External id": 295230,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368174405.119, "dur": 51.013, + "args": { + "External id": 295231,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2070547, "tid": 2107622, + "ts": 5333368174594.090, "dur": 557.191, + "args": { + "External id": 295232,"Record function id": 0, "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070547, "tid": 2107622, + "ts": 5333368174610.131, "dur": 528.797, + "args": { + "External id": 295233,"Record function id": 0, "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368174706.639, "dur": 6.276, + "args": { + "External id": 295234,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368174728.405, "dur": 28.657, + "args": { + "External id": 295235,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174732.702, "dur": 1.469, + "args": { + "External id": 295236,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174735.840, "dur": 1.850, + "args": { + "External id": 295237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174738.881, "dur": 0.426, + "args": { + "External id": 295238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174740.810, "dur": 0.178, + "args": { + "External id": 295239,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174743.707, "dur": 0.406, + "args": { + "External id": 295240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174745.297, "dur": 0.389, + "args": { + "External id": 295241,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174746.880, "dur": 1.906, + "args": { + "External id": 295242,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174751.143, "dur": 0.257, + "args": { + "External id": 295243,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174752.708, "dur": 0.341, + "args": { + "External id": 295244,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368174767.185, "dur": 32.007, + "args": { + "External id": 295245,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368174830.355, "dur": 91.556, + "args": { + "External id": 295246,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368174839.918, "dur": 4.090, + "args": { + "External id": 295247,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368174848.493, "dur": 9.723, + "args": { + "External id": 295248,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368174852.451, "dur": 5.347, + "args": { + "External id": 295249,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174856.030, "dur": 0.573, + "args": { + "External id": 295250,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368174865.217, "dur": 21.127, + "args": { + "External id": 295251,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174866.988, "dur": 0.370, + "args": { + "External id": 295252,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174869.744, "dur": 0.460, + "args": { + "External id": 295253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174871.164, "dur": 0.478, + "args": { + "External id": 295254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174873.291, "dur": 1.776, + "args": { + "External id": 295255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174875.936, "dur": 0.261, + "args": { + "External id": 295256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174877.416, "dur": 0.390, + "args": { + "External id": 295257,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174880.024, "dur": 0.329, + "args": { + "External id": 295258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174881.697, "dur": 0.375, + "args": { + "External id": 295259,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368174883.091, "dur": 0.349, + "args": { + "External id": 295260,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368174896.117, "dur": 18.398, + "args": { + "External id": 295261,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368174964.365, "dur": 112.492, + "args": { + "External id": 295262,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368174989.205, "dur": 84.324, + "args": { + "External id": 295263,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1886, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368174998.379, "dur": 70.847, + "args": { + "External id": 295264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368175090.827, "dur": 1.617, + "args": { + "External id": 295265,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1888, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368175182.730, "dur": 1626.653, + "args": { + "External id": 295266,"Sequence number": 1209200, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1889 + } + }, + { + "ph": "f", "id": 32, "pid": 2070547, "tid": 2107622, "ts": 5333368175182.730, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368175292.537, "dur": 108.752, + "args": { + "External id": 295267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368175446.307, "dur": 42.557, + "args": { + "External id": 295268,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368175508.390, "dur": 51.944, + "args": { + "External id": 295269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368175570.036, "dur": 36.947, + "args": { + "External id": 295270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368175614.934, "dur": 92.245, + "args": { + "External id": 295271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368175718.042, "dur": 29.586, + "args": { + "External id": 295272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368175754.491, "dur": 44.085, + "args": { + "External id": 295273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368175824.561, "dur": 23.651, + "args": { + "External id": 295274,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368175867.691, "dur": 26.870, + "args": { + "External id": 295275,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368175915.106, "dur": 18.475, + "args": { + "External id": 295276,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368175947.899, "dur": 15.582, + "args": { + "External id": 295277,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368175971.831, "dur": 29.281, + "args": { + "External id": 295278,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368176004.325, "dur": 32.264, + "args": { + "External id": 295279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368176064.407, "dur": 184.135, + "args": { + "External id": 295280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368176140.058, "dur": 5.752, + "args": { + "External id": 295281,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368176147.647, "dur": 3.417, + "args": { + "External id": 295282,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368176280.435, "dur": 25.168, + "args": { + "External id": 295283,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368176317.152, "dur": 13.385, + "args": { + "External id": 295284,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368176338.564, "dur": 37.694, + "args": { + "External id": 295285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368176381.546, "dur": 35.061, + "args": { + "External id": 295286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368176423.995, "dur": 19.614, + "args": { + "External id": 295287,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368176447.737, "dur": 29.533, + "args": { + "External id": 295288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368176482.559, "dur": 18.936, + "args": { + "External id": 295289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368176507.806, "dur": 28.408, + "args": { + "External id": 295290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368176551.890, "dur": 20.224, + "args": { + "External id": 295291,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368176588.690, "dur": 21.644, + "args": { + "External id": 295292,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368176660.140, "dur": 39.621, + "args": { + "External id": 295293,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368176732.869, "dur": 15.663, + "args": { + "External id": 295294,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368176761.861, "dur": 16.703, + "args": { + "External id": 295295,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176853.212, "dur": 18.440, + "args": { + "External id": 295296,"Record function id": 0, "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176859.955, "dur": 10.692, + "args": { + "External id": 295297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176864.133, "dur": 5.779, + "args": { + "External id": 295298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176865.793, "dur": 4.029, + "args": { + "External id": 295299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176875.579, "dur": 4.244, + "args": { + "External id": 295300,"Record function id": 0, "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176876.719, "dur": 2.669, + "args": { + "External id": 295301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176877.737, "dur": 1.175, + "args": { + "External id": 295302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176878.089, "dur": 0.711, + "args": { + "External id": 295303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176883.052, "dur": 5.404, + "args": { + "External id": 295304,"Record function id": 0, "Ev Idx": 1927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176884.234, "dur": 3.791, + "args": { + "External id": 295305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176884.898, "dur": 2.546, + "args": { + "External id": 295306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176885.318, "dur": 2.056, + "args": { + "External id": 295307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176891.607, "dur": 4.043, + "args": { + "External id": 295308,"Record function id": 0, "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176892.565, "dur": 2.677, + "args": { + "External id": 295309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176893.220, "dur": 1.578, + "args": { + "External id": 295310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176893.866, "dur": 0.870, + "args": { + "External id": 295311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176898.695, "dur": 4.223, + "args": { + "External id": 295312,"Record function id": 0, "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176899.587, "dur": 2.912, + "args": { + "External id": 295313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176900.147, "dur": 1.825, + "args": { + "External id": 295314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176901.159, "dur": 0.738, + "args": { + "External id": 295315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176906.065, "dur": 4.289, + "args": { + "External id": 295316,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176907.264, "dur": 2.692, + "args": { + "External id": 295317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176907.893, "dur": 1.563, + "args": { + "External id": 295318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176908.639, "dur": 0.753, + "args": { + "External id": 295319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176913.861, "dur": 4.113, + "args": { + "External id": 295320,"Record function id": 0, "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176914.995, "dur": 2.569, + "args": { + "External id": 295321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176915.580, "dur": 1.573, + "args": { + "External id": 295322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176916.207, "dur": 0.835, + "args": { + "External id": 295323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176921.534, "dur": 3.923, + "args": { + "External id": 295324,"Record function id": 0, "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176922.436, "dur": 2.618, + "args": { + "External id": 295325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176922.977, "dur": 1.683, + "args": { + "External id": 295326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176923.873, "dur": 0.716, + "args": { + "External id": 295327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176928.530, "dur": 3.955, + "args": { + "External id": 295328,"Record function id": 0, "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368176929.772, "dur": 2.319, + "args": { + "External id": 295329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176930.302, "dur": 1.351, + "args": { + "External id": 295330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368176930.867, "dur": 0.716, + "args": { + "External id": 295331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368176936.377, "dur": 36330.820, + "args": { + "External id": 295332,"Record function id": 0, "Sequence number": 1209199, "Fwd thread id": 1, "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368176937.730, "dur": 36320.006, + "args": { + "External id": 295333,"Sequence number": 1209199, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1956 + } + }, + { + "ph": "f", "id": 33, "pid": 2070547, "tid": 2107622, "ts": 5333368176937.730, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2070547, "tid": 2107622, + "ts": 5333368176968.853, "dur": 36.482, + "args": { + "External id": 295334,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2070547, "tid": 2107622, + "ts": 5333368177012.897, "dur": 61.782, + "args": { + "External id": 295335,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2070547, "tid": 2107622, + "ts": 5333368177081.289, "dur": 36168.823, + "args": { + "External id": 295336,"Record function id": 0, "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368177187.932, "dur": 7.913, + "args": { + "External id": 295337,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368177207.006, "dur": 6.178, + "args": { + "External id": 295338,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368177228.682, "dur": 35175.926, + "args": { + "External id": 295339,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368177245.624, "dur": 35150.099, + "args": { + "External id": 295340,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368177298.368, "dur": 17.053, + "args": { + "External id": 295341,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368177321.550, "dur": 35037.283, + "args": { + "External id": 295342,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368177325.369, "dur": 35032.762, + "args": { + "External id": 295343,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368177328.863, "dur": 4.933, + "args": { + "External id": 295344,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368177335.782, "dur": 35018.812, + "args": { + "External id": 295345,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368212491.799, "dur": 9.809, + "args": { + "External id": 295346,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368212494.830, "dur": 6.333, + "args": { + "External id": 295347,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368212528.721, "dur": 418.546, + "args": { + "External id": 295348,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368212555.162, "dur": 386.199, + "args": { + "External id": 295349,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1972, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368212566.762, "dur": 368.785, + "args": { + "External id": 295350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368212968.207, "dur": 2.323, + "args": { + "External id": 295351,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1974, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213032.709, "dur": 6.377, + "args": { + "External id": 295352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213082.538, "dur": 1.256, + "args": { + "External id": 295353,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213098.922, "dur": 2.730, + "args": { + "External id": 295354,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213112.762, "dur": 0.884, + "args": { + "External id": 295355,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213124.050, "dur": 1.073, + "args": { + "External id": 295356,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213134.919, "dur": 0.629, + "args": { + "External id": 295357,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213145.739, "dur": 2.872, + "args": { + "External id": 295358,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213158.411, "dur": 1.626, + "args": { + "External id": 295359,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213186.280, "dur": 1.396, + "args": { + "External id": 295360,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368213282.339, "dur": 2711.427, + "args": { + "External id": 295361,"Record function id": 0, "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2070547, "tid": 2107622, + "ts": 5333368213300.175, "dur": 993.616, + "args": { + "External id": 295362,"Record function id": 0, "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070547, "tid": 2107622, + "ts": 5333368213314.880, "dur": 292.235, + "args": { + "External id": 295363,"Record function id": 0, "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213388.629, "dur": 3.898, + "args": { + "External id": 295364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213395.702, "dur": 0.767, + "args": { + "External id": 295365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213398.336, "dur": 2.671, + "args": { + "External id": 295366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213402.546, "dur": 0.670, + "args": { + "External id": 295367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213405.010, "dur": 0.511, + "args": { + "External id": 295368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213407.093, "dur": 0.511, + "args": { + "External id": 295369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213409.106, "dur": 1.460, + "args": { + "External id": 295370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213411.966, "dur": 0.748, + "args": { + "External id": 295371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213414.554, "dur": 0.509, + "args": { + "External id": 295372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368213416.782, "dur": 0.507, + "args": { + "External id": 295373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368213434.880, "dur": 145.270, + "args": { + "External id": 295374,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368213449.938, "dur": 125.679, + "args": { + "External id": 295375,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368213463.013, "dur": 14.920, + "args": { + "External id": 295376,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368213481.980, "dur": 65.833, + "args": { + "External id": 295377,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368213485.855, "dur": 61.635, + "args": { + "External id": 295378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213489.042, "dur": 6.458, + "args": { + "External id": 295379,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368213497.246, "dur": 49.652, + "args": { + "External id": 295380,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2070547, "tid": 2107622, + "ts": 5333368213736.125, "dur": 550.124, + "args": { + "External id": 295381,"Record function id": 0, "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070547, "tid": 2107622, + "ts": 5333368213753.050, "dur": 520.448, + "args": { + "External id": 295382,"Record function id": 0, "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368213809.777, "dur": 6.172, + "args": { + "External id": 295383,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368213831.647, "dur": 28.760, + "args": { + "External id": 295384,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213836.015, "dur": 1.527, + "args": { + "External id": 295385,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213841.136, "dur": 0.398, + "args": { + "External id": 295386,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213842.448, "dur": 0.424, + "args": { + "External id": 295387,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213844.590, "dur": 0.537, + "args": { + "External id": 295388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213846.870, "dur": 0.442, + "args": { + "External id": 295389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213849.003, "dur": 2.167, + "args": { + "External id": 295390,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213852.305, "dur": 1.502, + "args": { + "External id": 295391,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213855.139, "dur": 0.277, + "args": { + "External id": 295392,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213856.232, "dur": 0.371, + "args": { + "External id": 295393,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368213869.643, "dur": 33.969, + "args": { + "External id": 295394,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368213935.138, "dur": 96.577, + "args": { + "External id": 295395,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368213944.824, "dur": 4.472, + "args": { + "External id": 295396,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368213954.095, "dur": 9.277, + "args": { + "External id": 295397,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368213958.119, "dur": 4.844, + "args": { + "External id": 295398,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213961.038, "dur": 0.627, + "args": { + "External id": 295399,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368213969.741, "dur": 23.616, + "args": { + "External id": 295400,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213971.455, "dur": 0.503, + "args": { + "External id": 295401,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213973.270, "dur": 1.610, + "args": { + "External id": 295402,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213976.342, "dur": 1.782, + "args": { + "External id": 295403,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213979.077, "dur": 0.516, + "args": { + "External id": 295404,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213982.629, "dur": 0.360, + "args": { + "External id": 295405,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213984.130, "dur": 0.322, + "args": { + "External id": 295406,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213985.789, "dur": 0.211, + "args": { + "External id": 295407,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213988.083, "dur": 0.373, + "args": { + "External id": 295408,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368213989.669, "dur": 0.392, + "args": { + "External id": 295409,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368214004.393, "dur": 19.988, + "args": { + "External id": 295410,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368214072.414, "dur": 130.023, + "args": { + "External id": 295411,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368214095.655, "dur": 103.010, + "args": { + "External id": 295412,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2035, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368214104.719, "dur": 89.111, + "args": { + "External id": 295413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368214218.502, "dur": 1.962, + "args": { + "External id": 295414,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2037, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368214301.342, "dur": 1669.253, + "args": { + "External id": 295415,"Sequence number": 1209198, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2038 + } + }, + { + "ph": "f", "id": 34, "pid": 2070547, "tid": 2107622, "ts": 5333368214301.342, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368214409.047, "dur": 103.595, + "args": { + "External id": 295416,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368214550.126, "dur": 35.135, + "args": { + "External id": 295417,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368214600.034, "dur": 94.489, + "args": { + "External id": 295418,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368214713.559, "dur": 35.638, + "args": { + "External id": 295419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368214755.905, "dur": 45.028, + "args": { + "External id": 295420,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368214807.356, "dur": 26.796, + "args": { + "External id": 295421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368214841.651, "dur": 41.293, + "args": { + "External id": 295422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368214911.734, "dur": 25.160, + "args": { + "External id": 295423,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368214960.269, "dur": 28.166, + "args": { + "External id": 295424,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368215040.952, "dur": 21.092, + "args": { + "External id": 295425,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368215076.525, "dur": 15.679, + "args": { + "External id": 295426,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215105.533, "dur": 31.389, + "args": { + "External id": 295427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215140.288, "dur": 50.348, + "args": { + "External id": 295428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368215224.418, "dur": 168.800, + "args": { + "External id": 295429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368215302.233, "dur": 7.526, + "args": { + "External id": 295430,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368215311.539, "dur": 2.704, + "args": { + "External id": 295431,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368215426.295, "dur": 24.774, + "args": { + "External id": 295432,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368215462.840, "dur": 13.063, + "args": { + "External id": 295433,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215483.384, "dur": 38.273, + "args": { + "External id": 295434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215526.995, "dur": 35.436, + "args": { + "External id": 295435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215570.758, "dur": 21.579, + "args": { + "External id": 295436,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215596.319, "dur": 67.631, + "args": { + "External id": 295437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215674.254, "dur": 24.577, + "args": { + "External id": 295438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368215708.416, "dur": 49.490, + "args": { + "External id": 295439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368215785.395, "dur": 25.390, + "args": { + "External id": 295440,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368215828.354, "dur": 23.085, + "args": { + "External id": 295441,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368215865.884, "dur": 20.844, + "args": { + "External id": 295442,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368215900.194, "dur": 13.249, + "args": { + "External id": 295443,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368215926.800, "dur": 15.218, + "args": { + "External id": 295444,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216015.157, "dur": 14.556, + "args": { + "External id": 295445,"Record function id": 0, "Ev Idx": 2068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216018.116, "dur": 10.533, + "args": { + "External id": 295446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216022.121, "dur": 5.608, + "args": { + "External id": 295447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216023.582, "dur": 4.017, + "args": { + "External id": 295448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216033.551, "dur": 6.110, + "args": { + "External id": 295449,"Record function id": 0, "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216034.742, "dur": 4.441, + "args": { + "External id": 295450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216035.752, "dur": 2.944, + "args": { + "External id": 295451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216036.250, "dur": 2.359, + "args": { + "External id": 295452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216043.016, "dur": 4.212, + "args": { + "External id": 295453,"Record function id": 0, "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216044.197, "dur": 2.618, + "args": { + "External id": 295454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216044.840, "dur": 1.535, + "args": { + "External id": 295455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216045.335, "dur": 0.958, + "args": { + "External id": 295456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216050.441, "dur": 4.325, + "args": { + "External id": 295457,"Record function id": 0, "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216051.599, "dur": 2.757, + "args": { + "External id": 295458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216052.292, "dur": 1.574, + "args": { + "External id": 295459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216053.125, "dur": 0.671, + "args": { + "External id": 295460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216058.169, "dur": 3.992, + "args": { + "External id": 295461,"Record function id": 0, "Ev Idx": 2084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216059.315, "dur": 2.413, + "args": { + "External id": 295462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216059.801, "dur": 1.473, + "args": { + "External id": 295463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216060.608, "dur": 0.592, + "args": { + "External id": 295464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216065.528, "dur": 4.318, + "args": { + "External id": 295465,"Record function id": 0, "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216066.630, "dur": 2.803, + "args": { + "External id": 295466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216067.313, "dur": 1.686, + "args": { + "External id": 295467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216068.282, "dur": 0.633, + "args": { + "External id": 295468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216073.113, "dur": 4.241, + "args": { + "External id": 295469,"Record function id": 0, "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216074.379, "dur": 2.589, + "args": { + "External id": 295470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216075.165, "dur": 1.356, + "args": { + "External id": 295471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216075.719, "dur": 0.737, + "args": { + "External id": 295472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216080.565, "dur": 4.059, + "args": { + "External id": 295473,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216081.730, "dur": 2.500, + "args": { + "External id": 295474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216082.610, "dur": 1.186, + "args": { + "External id": 295475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216083.119, "dur": 0.604, + "args": { + "External id": 295476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216087.764, "dur": 5.390, + "args": { + "External id": 295477,"Record function id": 0, "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368216089.041, "dur": 3.714, + "args": { + "External id": 295478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216089.464, "dur": 2.848, + "args": { + "External id": 295479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368216089.991, "dur": 2.247, + "args": { + "External id": 295480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368216097.095, "dur": 36193.999, + "args": { + "External id": 295481,"Record function id": 0, "Sequence number": 1209197, "Fwd thread id": 1, "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368216098.532, "dur": 36184.191, + "args": { + "External id": 295482,"Sequence number": 1209197, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2105 + } + }, + { + "ph": "f", "id": 35, "pid": 2070547, "tid": 2107622, "ts": 5333368216098.532, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2070547, "tid": 2107622, + "ts": 5333368216126.069, "dur": 36.246, + "args": { + "External id": 295483,"Record function id": 0, "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2070547, "tid": 2107622, + "ts": 5333368216187.513, "dur": 62.643, + "args": { + "External id": 295484,"Record function id": 0, "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2070547, "tid": 2107622, + "ts": 5333368216256.467, "dur": 36018.160, + "args": { + "External id": 295485,"Record function id": 0, "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368216348.455, "dur": 7.552, + "args": { + "External id": 295486,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368216365.801, "dur": 4.689, + "args": { + "External id": 295487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368216384.703, "dur": 35023.038, + "args": { + "External id": 295488,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368216397.908, "dur": 35000.464, + "args": { + "External id": 295489,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368216482.268, "dur": 14.004, + "args": { + "External id": 295490,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368216502.520, "dur": 34857.841, + "args": { + "External id": 295491,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368216505.253, "dur": 34854.319, + "args": { + "External id": 295492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368216508.577, "dur": 5.464, + "args": { + "External id": 295493,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368216515.590, "dur": 34840.374, + "args": { + "External id": 295494,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368251495.548, "dur": 9.531, + "args": { + "External id": 295495,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368251498.235, "dur": 6.480, + "args": { + "External id": 295496,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368251532.187, "dur": 440.195, + "args": { + "External id": 295497,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368251559.898, "dur": 406.716, + "args": { + "External id": 295498,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2121, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368251570.392, "dur": 390.075, + "args": { + "External id": 295499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368251992.979, "dur": 2.225, + "args": { + "External id": 295500,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2123, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252055.891, "dur": 6.478, + "args": { + "External id": 295501,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252104.383, "dur": 3.053, + "args": { + "External id": 295502,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252122.481, "dur": 1.175, + "args": { + "External id": 295503,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252134.692, "dur": 0.655, + "args": { + "External id": 295504,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252147.272, "dur": 0.777, + "args": { + "External id": 295505,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252157.233, "dur": 2.437, + "args": { + "External id": 295506,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252188.371, "dur": 1.525, + "args": { + "External id": 295507,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252202.115, "dur": 1.903, + "args": { + "External id": 295508,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252213.342, "dur": 0.771, + "args": { + "External id": 295509,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368252305.890, "dur": 2632.319, + "args": { + "External id": 295510,"Record function id": 0, "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2070547, "tid": 2107622, + "ts": 5333368252324.747, "dur": 960.655, + "args": { + "External id": 295511,"Record function id": 0, "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070547, "tid": 2107622, + "ts": 5333368252338.771, "dur": 329.022, + "args": { + "External id": 295512,"Record function id": 0, "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252414.739, "dur": 5.102, + "args": { + "External id": 295513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252423.199, "dur": 0.981, + "args": { + "External id": 295514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252426.256, "dur": 0.751, + "args": { + "External id": 295515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252428.747, "dur": 0.778, + "args": { + "External id": 295516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252431.247, "dur": 0.866, + "args": { + "External id": 295517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252433.513, "dur": 0.967, + "args": { + "External id": 295518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252436.085, "dur": 1.622, + "args": { + "External id": 295519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252439.220, "dur": 0.829, + "args": { + "External id": 295520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252441.590, "dur": 2.056, + "args": { + "External id": 295521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368252445.436, "dur": 0.607, + "args": { + "External id": 295522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368252463.058, "dur": 139.265, + "args": { + "External id": 295523,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368252478.777, "dur": 119.211, + "args": { + "External id": 295524,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368252491.707, "dur": 12.300, + "args": { + "External id": 295525,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368252507.838, "dur": 63.145, + "args": { + "External id": 295526,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368252510.473, "dur": 60.245, + "args": { + "External id": 295527,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252514.211, "dur": 5.369, + "args": { + "External id": 295528,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368252521.049, "dur": 49.129, + "args": { + "External id": 295529,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2070547, "tid": 2107622, + "ts": 5333368252745.369, "dur": 532.369, + "args": { + "External id": 295530,"Record function id": 0, "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070547, "tid": 2107622, + "ts": 5333368252760.729, "dur": 504.927, + "args": { + "External id": 295531,"Record function id": 0, "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368252816.275, "dur": 6.269, + "args": { + "External id": 295532,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368252837.285, "dur": 28.379, + "args": { + "External id": 295533,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252842.486, "dur": 1.463, + "args": { + "External id": 295534,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252845.895, "dur": 0.431, + "args": { + "External id": 295535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252847.613, "dur": 0.888, + "args": { + "External id": 295536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252850.115, "dur": 2.805, + "args": { + "External id": 295537,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252854.157, "dur": 0.424, + "args": { + "External id": 295538,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252855.805, "dur": 0.339, + "args": { + "External id": 295539,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252857.441, "dur": 0.579, + "args": { + "External id": 295540,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252859.598, "dur": 0.330, + "args": { + "External id": 295541,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252861.125, "dur": 0.396, + "args": { + "External id": 295542,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368252874.843, "dur": 33.012, + "args": { + "External id": 295543,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368252937.093, "dur": 93.639, + "args": { + "External id": 295544,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368252946.405, "dur": 3.393, + "args": { + "External id": 295545,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368252954.540, "dur": 9.320, + "args": { + "External id": 295546,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368252958.608, "dur": 4.812, + "args": { + "External id": 295547,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252961.907, "dur": 0.422, + "args": { + "External id": 295548,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368252969.880, "dur": 25.703, + "args": { + "External id": 295549,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252972.605, "dur": 1.444, + "args": { + "External id": 295550,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252975.575, "dur": 0.279, + "args": { + "External id": 295551,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252977.222, "dur": 0.810, + "args": { + "External id": 295552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252979.531, "dur": 0.316, + "args": { + "External id": 295553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252981.450, "dur": 0.356, + "args": { + "External id": 295554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252983.629, "dur": 0.477, + "args": { + "External id": 295555,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252985.068, "dur": 1.592, + "args": { + "External id": 295556,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252988.181, "dur": 0.284, + "args": { + "External id": 295557,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368252990.447, "dur": 1.874, + "args": { + "External id": 295558,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368253005.704, "dur": 17.912, + "args": { + "External id": 295559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368253072.419, "dur": 126.209, + "args": { + "External id": 295560,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368253094.925, "dur": 100.120, + "args": { + "External id": 295561,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2184, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368253103.757, "dur": 86.337, + "args": { + "External id": 295562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368253214.008, "dur": 2.093, + "args": { + "External id": 295563,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2186, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368253292.399, "dur": 1623.129, + "args": { + "External id": 295564,"Sequence number": 1209196, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2187 + } + }, + { + "ph": "f", "id": 36, "pid": 2070547, "tid": 2107622, "ts": 5333368253292.399, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368253396.476, "dur": 101.397, + "args": { + "External id": 295565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368253535.198, "dur": 36.031, + "args": { + "External id": 295566,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368253588.333, "dur": 89.600, + "args": { + "External id": 295567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368253691.223, "dur": 36.644, + "args": { + "External id": 295568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368253734.162, "dur": 45.630, + "args": { + "External id": 295569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368253786.348, "dur": 27.040, + "args": { + "External id": 295570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368253823.182, "dur": 41.489, + "args": { + "External id": 295571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368253889.493, "dur": 24.343, + "args": { + "External id": 295572,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368253931.420, "dur": 26.329, + "args": { + "External id": 295573,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368253977.472, "dur": 18.445, + "args": { + "External id": 295574,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368254008.933, "dur": 13.736, + "args": { + "External id": 295575,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254032.114, "dur": 30.999, + "args": { + "External id": 295576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254066.205, "dur": 31.910, + "args": { + "External id": 295577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368254129.146, "dur": 189.878, + "args": { + "External id": 295578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368254227.623, "dur": 6.770, + "args": { + "External id": 295579,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368254236.717, "dur": 3.132, + "args": { + "External id": 295580,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368254355.148, "dur": 23.824, + "args": { + "External id": 295581,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368254391.351, "dur": 17.638, + "args": { + "External id": 295582,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254416.891, "dur": 43.161, + "args": { + "External id": 295583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254467.061, "dur": 34.942, + "args": { + "External id": 295584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254508.548, "dur": 24.808, + "args": { + "External id": 295585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254537.781, "dur": 30.041, + "args": { + "External id": 295586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254573.561, "dur": 19.228, + "args": { + "External id": 295587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368254600.344, "dur": 79.328, + "args": { + "External id": 295588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368254701.774, "dur": 26.916, + "args": { + "External id": 295589,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368254746.231, "dur": 23.708, + "args": { + "External id": 295590,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368254799.536, "dur": 24.860, + "args": { + "External id": 295591,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368254840.546, "dur": 14.717, + "args": { + "External id": 295592,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368254867.544, "dur": 16.621, + "args": { + "External id": 295593,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368254963.214, "dur": 16.643, + "args": { + "External id": 295594,"Record function id": 0, "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368254966.505, "dur": 12.327, + "args": { + "External id": 295595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368254970.599, "dur": 7.306, + "args": { + "External id": 295596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368254971.979, "dur": 5.840, + "args": { + "External id": 295597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368254983.618, "dur": 4.883, + "args": { + "External id": 295598,"Record function id": 0, "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368254985.174, "dur": 2.867, + "args": { + "External id": 295599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368254985.887, "dur": 1.658, + "args": { + "External id": 295600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368254986.390, "dur": 1.091, + "args": { + "External id": 295601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368254991.802, "dur": 3.855, + "args": { + "External id": 295602,"Record function id": 0, "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368254992.844, "dur": 2.369, + "args": { + "External id": 295603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368254993.597, "dur": 1.160, + "args": { + "External id": 295604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368254993.986, "dur": 0.692, + "args": { + "External id": 295605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368254999.022, "dur": 3.955, + "args": { + "External id": 295606,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255000.027, "dur": 2.501, + "args": { + "External id": 295607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255000.690, "dur": 1.414, + "args": { + "External id": 295608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255001.403, "dur": 0.635, + "args": { + "External id": 295609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255006.133, "dur": 3.822, + "args": { + "External id": 295610,"Record function id": 0, "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255007.213, "dur": 2.301, + "args": { + "External id": 295611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255007.734, "dur": 1.348, + "args": { + "External id": 295612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255008.325, "dur": 0.693, + "args": { + "External id": 295613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255012.997, "dur": 4.559, + "args": { + "External id": 295614,"Record function id": 0, "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255014.347, "dur": 2.800, + "args": { + "External id": 295615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255015.283, "dur": 1.453, + "args": { + "External id": 295616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255015.857, "dur": 0.812, + "args": { + "External id": 295617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255020.824, "dur": 3.610, + "args": { + "External id": 295618,"Record function id": 0, "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255022.021, "dur": 1.955, + "args": { + "External id": 295619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255022.462, "dur": 1.078, + "args": { + "External id": 295620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255022.879, "dur": 0.596, + "args": { + "External id": 295621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255027.559, "dur": 5.663, + "args": { + "External id": 295622,"Record function id": 0, "Ev Idx": 2245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255028.574, "dur": 4.221, + "args": { + "External id": 295623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255029.143, "dur": 3.247, + "args": { + "External id": 295624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255029.600, "dur": 2.697, + "args": { + "External id": 295625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255036.588, "dur": 3.390, + "args": { + "External id": 295626,"Record function id": 0, "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368255037.571, "dur": 1.992, + "args": { + "External id": 295627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255038.013, "dur": 1.149, + "args": { + "External id": 295628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368255038.445, "dur": 0.624, + "args": { + "External id": 295629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368255043.910, "dur": 36697.690, + "args": { + "External id": 295630,"Record function id": 0, "Sequence number": 1209195, "Fwd thread id": 1, "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368255045.432, "dur": 36686.988, + "args": { + "External id": 295631,"Sequence number": 1209195, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2254 + } + }, + { + "ph": "f", "id": 37, "pid": 2070547, "tid": 2107622, "ts": 5333368255045.432, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2070547, "tid": 2107622, + "ts": 5333368255074.100, "dur": 35.730, + "args": { + "External id": 295632,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2070547, "tid": 2107622, + "ts": 5333368255117.231, "dur": 76.634, + "args": { + "External id": 295633,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2070547, "tid": 2107622, + "ts": 5333368255201.309, "dur": 36522.761, + "args": { + "External id": 295634,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368255291.426, "dur": 6.690, + "args": { + "External id": 295635,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368255308.147, "dur": 4.909, + "args": { + "External id": 295636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368255327.309, "dur": 35580.109, + "args": { + "External id": 295637,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368255339.710, "dur": 35558.565, + "args": { + "External id": 295638,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368255414.454, "dur": 13.994, + "args": { + "External id": 295639,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368255434.361, "dur": 35425.927, + "args": { + "External id": 295640,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368255437.217, "dur": 35422.369, + "args": { + "External id": 295641,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368255440.928, "dur": 4.711, + "args": { + "External id": 295642,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368255447.241, "dur": 35408.892, + "args": { + "External id": 295643,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368290996.072, "dur": 9.456, + "args": { + "External id": 295644,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368290999.061, "dur": 6.067, + "args": { + "External id": 295645,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368291033.899, "dur": 371.821, + "args": { + "External id": 295646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368291059.232, "dur": 341.317, + "args": { + "External id": 295647,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2270, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368291069.523, "dur": 325.157, + "args": { + "External id": 295648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368291424.836, "dur": 2.155, + "args": { + "External id": 295649,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2272, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291486.604, "dur": 6.085, + "args": { + "External id": 295650,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291535.969, "dur": 1.241, + "args": { + "External id": 295651,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291552.102, "dur": 1.202, + "args": { + "External id": 295652,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291564.925, "dur": 0.678, + "args": { + "External id": 295653,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291576.113, "dur": 1.125, + "args": { + "External id": 295654,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291587.972, "dur": 0.797, + "args": { + "External id": 295655,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291599.411, "dur": 0.845, + "args": { + "External id": 295656,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291611.112, "dur": 1.716, + "args": { + "External id": 295657,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291655.504, "dur": 1.463, + "args": { + "External id": 295658,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368291759.165, "dur": 2618.964, + "args": { + "External id": 295659,"Record function id": 0, "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2070547, "tid": 2107622, + "ts": 5333368291778.334, "dur": 977.907, + "args": { + "External id": 295660,"Record function id": 0, "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070547, "tid": 2107622, + "ts": 5333368291792.356, "dur": 298.671, + "args": { + "External id": 295661,"Record function id": 0, "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291869.308, "dur": 4.257, + "args": { + "External id": 295662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291876.795, "dur": 0.745, + "args": { + "External id": 295663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291879.324, "dur": 0.815, + "args": { + "External id": 295664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291882.197, "dur": 0.785, + "args": { + "External id": 295665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291884.650, "dur": 0.874, + "args": { + "External id": 295666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291887.177, "dur": 0.897, + "args": { + "External id": 295667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291889.700, "dur": 1.586, + "args": { + "External id": 295668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291892.650, "dur": 2.223, + "args": { + "External id": 295669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291896.279, "dur": 0.673, + "args": { + "External id": 295670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368291898.293, "dur": 0.606, + "args": { + "External id": 295671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368291916.499, "dur": 147.711, + "args": { + "External id": 295672,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368291931.531, "dur": 128.453, + "args": { + "External id": 295673,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368291944.090, "dur": 12.683, + "args": { + "External id": 295674,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368291960.914, "dur": 70.125, + "args": { + "External id": 295675,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368291963.528, "dur": 67.220, + "args": { + "External id": 295676,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368291966.677, "dur": 5.613, + "args": { + "External id": 295677,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368291978.886, "dur": 51.099, + "args": { + "External id": 295678,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2070547, "tid": 2107622, + "ts": 5333368292165.004, "dur": 582.730, + "args": { + "External id": 295679,"Record function id": 0, "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070547, "tid": 2107622, + "ts": 5333368292200.089, "dur": 533.719, + "args": { + "External id": 295680,"Record function id": 0, "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368292255.567, "dur": 5.915, + "args": { + "External id": 295681,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368292278.091, "dur": 24.628, + "args": { + "External id": 295682,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292283.083, "dur": 1.654, + "args": { + "External id": 295683,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292286.557, "dur": 0.435, + "args": { + "External id": 295684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292288.488, "dur": 2.822, + "args": { + "External id": 295685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292292.211, "dur": 0.358, + "args": { + "External id": 295686,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292293.837, "dur": 0.284, + "args": { + "External id": 295687,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292295.094, "dur": 0.383, + "args": { + "External id": 295688,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292296.276, "dur": 0.452, + "args": { + "External id": 295689,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292297.534, "dur": 0.336, + "args": { + "External id": 295690,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292298.701, "dur": 0.299, + "args": { + "External id": 295691,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368292312.460, "dur": 31.794, + "args": { + "External id": 295692,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368292373.419, "dur": 86.295, + "args": { + "External id": 295693,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368292383.037, "dur": 3.140, + "args": { + "External id": 295694,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368292390.723, "dur": 10.699, + "args": { + "External id": 295695,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368292394.410, "dur": 6.597, + "args": { + "External id": 295696,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292397.552, "dur": 2.311, + "args": { + "External id": 295697,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368292407.617, "dur": 18.313, + "args": { + "External id": 295698,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292409.503, "dur": 0.502, + "args": { + "External id": 295699,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292411.073, "dur": 0.801, + "args": { + "External id": 295700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292412.980, "dur": 0.603, + "args": { + "External id": 295701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292414.669, "dur": 0.353, + "args": { + "External id": 295702,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292415.808, "dur": 0.374, + "args": { + "External id": 295703,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292416.972, "dur": 0.455, + "args": { + "External id": 295704,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292418.355, "dur": 0.407, + "args": { + "External id": 295705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292419.530, "dur": 2.218, + "args": { + "External id": 295706,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368292422.498, "dur": 0.266, + "args": { + "External id": 295707,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368292434.777, "dur": 17.533, + "args": { + "External id": 295708,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368292501.342, "dur": 107.936, + "args": { + "External id": 295709,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368292522.112, "dur": 83.805, + "args": { + "External id": 295710,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2333, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368292530.995, "dur": 71.065, + "args": { + "External id": 295711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368292673.858, "dur": 3.350, + "args": { + "External id": 295712,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2335, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368292763.539, "dur": 1592.859, + "args": { + "External id": 295713,"Sequence number": 1209194, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2336 + } + }, + { + "ph": "f", "id": 38, "pid": 2070547, "tid": 2107622, "ts": 5333368292763.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368292869.672, "dur": 106.881, + "args": { + "External id": 295714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368293012.690, "dur": 37.387, + "args": { + "External id": 295715,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293067.580, "dur": 48.084, + "args": { + "External id": 295716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293124.762, "dur": 32.097, + "args": { + "External id": 295717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293162.786, "dur": 66.324, + "args": { + "External id": 295718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293238.752, "dur": 27.698, + "args": { + "External id": 295719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293275.264, "dur": 41.311, + "args": { + "External id": 295720,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368293340.027, "dur": 22.423, + "args": { + "External id": 295721,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368293378.607, "dur": 27.032, + "args": { + "External id": 295722,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368293423.302, "dur": 18.686, + "args": { + "External id": 295723,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368293452.099, "dur": 20.955, + "args": { + "External id": 295724,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293482.224, "dur": 31.486, + "args": { + "External id": 295725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293516.596, "dur": 31.197, + "args": { + "External id": 295726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368293576.707, "dur": 206.692, + "args": { + "External id": 295727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368293691.889, "dur": 7.924, + "args": { + "External id": 295728,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368293701.959, "dur": 2.367, + "args": { + "External id": 295729,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368293817.383, "dur": 25.711, + "args": { + "External id": 295730,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368293856.799, "dur": 14.764, + "args": { + "External id": 295731,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293880.652, "dur": 43.597, + "args": { + "External id": 295732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293929.966, "dur": 34.338, + "args": { + "External id": 295733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368293970.646, "dur": 25.192, + "args": { + "External id": 295734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368294000.122, "dur": 28.317, + "args": { + "External id": 295735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368294035.622, "dur": 19.601, + "args": { + "External id": 295736,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368294061.731, "dur": 28.732, + "args": { + "External id": 295737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368294120.774, "dur": 25.634, + "args": { + "External id": 295738,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368294161.358, "dur": 59.515, + "args": { + "External id": 295739,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368294246.880, "dur": 19.502, + "args": { + "External id": 295740,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368294282.038, "dur": 14.470, + "args": { + "External id": 295741,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368294308.625, "dur": 14.912, + "args": { + "External id": 295742,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294403.108, "dur": 15.047, + "args": { + "External id": 295743,"Record function id": 0, "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294406.172, "dur": 10.932, + "args": { + "External id": 295744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294410.459, "dur": 5.792, + "args": { + "External id": 295745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294411.956, "dur": 4.214, + "args": { + "External id": 295746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294422.090, "dur": 3.989, + "args": { + "External id": 295747,"Record function id": 0, "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294423.160, "dur": 2.482, + "args": { + "External id": 295748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294423.883, "dur": 1.269, + "args": { + "External id": 295749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294424.190, "dur": 0.881, + "args": { + "External id": 295750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294429.252, "dur": 4.865, + "args": { + "External id": 295751,"Record function id": 0, "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294430.663, "dur": 3.073, + "args": { + "External id": 295752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294431.326, "dur": 1.921, + "args": { + "External id": 295753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294432.156, "dur": 1.015, + "args": { + "External id": 295754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294437.343, "dur": 4.300, + "args": { + "External id": 295755,"Record function id": 0, "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294439.015, "dur": 2.252, + "args": { + "External id": 295756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294439.711, "dur": 1.154, + "args": { + "External id": 295757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294440.005, "dur": 0.793, + "args": { + "External id": 295758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294444.744, "dur": 4.120, + "args": { + "External id": 295759,"Record function id": 0, "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294446.061, "dur": 2.392, + "args": { + "External id": 295760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294446.562, "dur": 1.440, + "args": { + "External id": 295761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294447.229, "dur": 0.706, + "args": { + "External id": 295762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294452.056, "dur": 3.544, + "args": { + "External id": 295763,"Record function id": 0, "Ev Idx": 2386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294452.990, "dur": 2.228, + "args": { + "External id": 295764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294453.585, "dur": 1.227, + "args": { + "External id": 295765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294454.101, "dur": 0.649, + "args": { + "External id": 295766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294461.598, "dur": 5.081, + "args": { + "External id": 295767,"Record function id": 0, "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294462.692, "dur": 3.581, + "args": { + "External id": 295768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294463.160, "dur": 2.705, + "args": { + "External id": 295769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294463.528, "dur": 2.276, + "args": { + "External id": 295770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294469.761, "dur": 3.668, + "args": { + "External id": 295771,"Record function id": 0, "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294470.682, "dur": 2.371, + "args": { + "External id": 295772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294471.196, "dur": 1.396, + "args": { + "External id": 295773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294471.689, "dur": 0.829, + "args": { + "External id": 295774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294476.478, "dur": 3.403, + "args": { + "External id": 295775,"Record function id": 0, "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368294477.500, "dur": 1.986, + "args": { + "External id": 295776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294477.995, "dur": 1.076, + "args": { + "External id": 295777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368294478.361, "dur": 0.636, + "args": { + "External id": 295778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368294483.498, "dur": 37004.220, + "args": { + "External id": 295779,"Record function id": 0, "Sequence number": 1209193, "Fwd thread id": 1, "Ev Idx": 2402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368294485.245, "dur": 36993.746, + "args": { + "External id": 295780,"Sequence number": 1209193, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2403 + } + }, + { + "ph": "f", "id": 39, "pid": 2070547, "tid": 2107622, "ts": 5333368294485.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2070547, "tid": 2107622, + "ts": 5333368294514.201, "dur": 37.807, + "args": { + "External id": 295781,"Record function id": 0, "Ev Idx": 2404 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2070547, "tid": 2107622, + "ts": 5333368294559.201, "dur": 56.689, + "args": { + "External id": 295782,"Record function id": 0, "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2070547, "tid": 2107622, + "ts": 5333368294660.541, "dur": 36810.591, + "args": { + "External id": 295783,"Record function id": 0, "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368294751.020, "dur": 6.987, + "args": { + "External id": 295784,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368294768.182, "dur": 4.701, + "args": { + "External id": 295785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368294787.356, "dur": 35810.154, + "args": { + "External id": 295786,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368294800.712, "dur": 35787.346, + "args": { + "External id": 295787,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368294851.966, "dur": 13.645, + "args": { + "External id": 295788,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368294871.685, "dur": 35680.814, + "args": { + "External id": 295789,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368294874.083, "dur": 35677.571, + "args": { + "External id": 295790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368294877.603, "dur": 5.155, + "args": { + "External id": 295791,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368294884.327, "dur": 35663.546, + "args": { + "External id": 295792,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368330712.813, "dur": 9.706, + "args": { + "External id": 295793,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368330715.632, "dur": 6.175, + "args": { + "External id": 295794,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368330753.620, "dur": 434.221, + "args": { + "External id": 295795,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368330779.194, "dur": 402.355, + "args": { + "External id": 295796,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2419, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368330789.916, "dur": 371.962, + "args": { + "External id": 295797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368331210.929, "dur": 2.557, + "args": { + "External id": 295798,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2421, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331276.196, "dur": 8.567, + "args": { + "External id": 295799,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331326.684, "dur": 1.033, + "args": { + "External id": 295800,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331342.020, "dur": 1.576, + "args": { + "External id": 295801,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331354.884, "dur": 0.995, + "args": { + "External id": 295802,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331365.892, "dur": 2.511, + "args": { + "External id": 295803,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331377.009, "dur": 1.082, + "args": { + "External id": 295804,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331388.007, "dur": 0.945, + "args": { + "External id": 295805,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331399.082, "dur": 1.796, + "args": { + "External id": 295806,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331410.087, "dur": 2.544, + "args": { + "External id": 295807,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368331502.845, "dur": 2589.740, + "args": { + "External id": 295808,"Record function id": 0, "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2070547, "tid": 2107622, + "ts": 5333368331522.257, "dur": 965.621, + "args": { + "External id": 295809,"Record function id": 0, "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070547, "tid": 2107622, + "ts": 5333368331537.286, "dur": 344.154, + "args": { + "External id": 295810,"Record function id": 0, "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331613.065, "dur": 4.216, + "args": { + "External id": 295811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331667.892, "dur": 1.699, + "args": { + "External id": 295812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331672.042, "dur": 0.696, + "args": { + "External id": 295813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331674.245, "dur": 0.608, + "args": { + "External id": 295814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331676.591, "dur": 0.675, + "args": { + "External id": 295815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331678.571, "dur": 0.829, + "args": { + "External id": 295816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331680.758, "dur": 3.349, + "args": { + "External id": 295817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331685.518, "dur": 0.728, + "args": { + "External id": 295818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331687.890, "dur": 0.774, + "args": { + "External id": 295819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368331689.904, "dur": 0.662, + "args": { + "External id": 295820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368331709.777, "dur": 142.315, + "args": { + "External id": 295821,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368331725.315, "dur": 122.106, + "args": { + "External id": 295822,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368331738.745, "dur": 12.651, + "args": { + "External id": 295823,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368331755.399, "dur": 65.373, + "args": { + "External id": 295824,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368331758.015, "dur": 62.486, + "args": { + "External id": 295825,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368331761.234, "dur": 5.879, + "args": { + "External id": 295826,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368331768.811, "dur": 51.099, + "args": { + "External id": 295827,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2070547, "tid": 2107622, + "ts": 5333368331957.388, "dur": 523.212, + "args": { + "External id": 295828,"Record function id": 0, "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070547, "tid": 2107622, + "ts": 5333368331972.965, "dur": 495.388, + "args": { + "External id": 295829,"Record function id": 0, "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368332027.237, "dur": 4.572, + "args": { + "External id": 295830,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368332047.101, "dur": 24.244, + "args": { + "External id": 295831,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332051.411, "dur": 1.330, + "args": { + "External id": 295832,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332054.798, "dur": 3.108, + "args": { + "External id": 295833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332059.057, "dur": 0.341, + "args": { + "External id": 295834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332060.409, "dur": 0.621, + "args": { + "External id": 295835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332062.159, "dur": 0.357, + "args": { + "External id": 295836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332063.703, "dur": 0.350, + "args": { + "External id": 295837,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332064.759, "dur": 0.460, + "args": { + "External id": 295838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332065.965, "dur": 0.474, + "args": { + "External id": 295839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332067.252, "dur": 0.361, + "args": { + "External id": 295840,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368332080.149, "dur": 28.155, + "args": { + "External id": 295841,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368332138.128, "dur": 114.968, + "args": { + "External id": 295842,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368332147.249, "dur": 4.292, + "args": { + "External id": 295843,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368332156.079, "dur": 24.414, + "args": { + "External id": 295844,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368332160.233, "dur": 4.730, + "args": { + "External id": 295845,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332163.169, "dur": 0.654, + "args": { + "External id": 295846,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368332190.211, "dur": 21.721, + "args": { + "External id": 295847,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332192.257, "dur": 0.715, + "args": { + "External id": 295848,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332194.441, "dur": 0.686, + "args": { + "External id": 295849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332196.214, "dur": 0.567, + "args": { + "External id": 295850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332197.600, "dur": 0.258, + "args": { + "External id": 295851,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332198.962, "dur": 0.378, + "args": { + "External id": 295852,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332200.709, "dur": 0.381, + "args": { + "External id": 295853,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332203.191, "dur": 2.379, + "args": { + "External id": 295854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332206.476, "dur": 0.360, + "args": { + "External id": 295855,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368332208.318, "dur": 0.365, + "args": { + "External id": 295856,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368332222.647, "dur": 22.809, + "args": { + "External id": 295857,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368332296.431, "dur": 110.653, + "args": { + "External id": 295858,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368332318.899, "dur": 85.184, + "args": { + "External id": 295859,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2482, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368332327.894, "dur": 72.177, + "args": { + "External id": 295860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368332420.307, "dur": 1.906, + "args": { + "External id": 295861,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2484, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368332495.695, "dur": 1575.632, + "args": { + "External id": 295862,"Sequence number": 1209192, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2485 + } + }, + { + "ph": "f", "id": 40, "pid": 2070547, "tid": 2107622, "ts": 5333368332495.695, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368332598.543, "dur": 142.909, + "args": { + "External id": 295863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368332784.449, "dur": 38.120, + "args": { + "External id": 295864,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368332840.549, "dur": 49.674, + "args": { + "External id": 295865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368332900.639, "dur": 32.170, + "args": { + "External id": 295866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368332938.629, "dur": 44.209, + "args": { + "External id": 295867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368332990.938, "dur": 27.144, + "args": { + "External id": 295868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333025.246, "dur": 41.114, + "args": { + "External id": 295869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368333085.952, "dur": 21.734, + "args": { + "External id": 295870,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368333124.587, "dur": 27.821, + "args": { + "External id": 295871,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368333186.931, "dur": 20.760, + "args": { + "External id": 295872,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368333221.776, "dur": 18.217, + "args": { + "External id": 295873,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333249.767, "dur": 33.598, + "args": { + "External id": 295874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333286.865, "dur": 35.807, + "args": { + "External id": 295875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368333348.326, "dur": 171.526, + "args": { + "External id": 295876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368333426.434, "dur": 5.906, + "args": { + "External id": 295877,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368333437.441, "dur": 4.807, + "args": { + "External id": 295878,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368333550.938, "dur": 24.230, + "args": { + "External id": 295879,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368333586.368, "dur": 17.591, + "args": { + "External id": 295880,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333611.128, "dur": 75.726, + "args": { + "External id": 295881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333695.835, "dur": 35.561, + "args": { + "External id": 295882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333738.329, "dur": 19.954, + "args": { + "External id": 295883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333764.681, "dur": 29.063, + "args": { + "External id": 295884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333799.011, "dur": 19.053, + "args": { + "External id": 295885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368333824.528, "dur": 30.482, + "args": { + "External id": 295886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368333873.363, "dur": 21.271, + "args": { + "External id": 295887,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368333911.401, "dur": 21.389, + "args": { + "External id": 295888,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368333946.515, "dur": 15.468, + "args": { + "External id": 295889,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368333992.639, "dur": 20.784, + "args": { + "External id": 295890,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368334026.983, "dur": 15.167, + "args": { + "External id": 295891,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334118.259, "dur": 15.195, + "args": { + "External id": 295892,"Record function id": 0, "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334121.728, "dur": 10.761, + "args": { + "External id": 295893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334125.741, "dur": 5.884, + "args": { + "External id": 295894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334127.412, "dur": 4.133, + "args": { + "External id": 295895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334137.209, "dur": 4.737, + "args": { + "External id": 295896,"Record function id": 0, "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334138.822, "dur": 2.688, + "args": { + "External id": 295897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334139.715, "dur": 1.330, + "args": { + "External id": 295898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334140.276, "dur": 0.702, + "args": { + "External id": 295899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334145.425, "dur": 3.958, + "args": { + "External id": 295900,"Record function id": 0, "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334146.522, "dur": 2.452, + "args": { + "External id": 295901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334147.037, "dur": 1.492, + "args": { + "External id": 295902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334147.450, "dur": 0.997, + "args": { + "External id": 295903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334152.634, "dur": 6.947, + "args": { + "External id": 295904,"Record function id": 0, "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334154.030, "dur": 5.138, + "args": { + "External id": 295905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334157.720, "dur": 1.015, + "args": { + "External id": 295906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334158.037, "dur": 0.631, + "args": { + "External id": 295907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334162.760, "dur": 22.620, + "args": { + "External id": 295908,"Record function id": 0, "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334163.996, "dur": 20.292, + "args": { + "External id": 295909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334164.466, "dur": 18.810, + "args": { + "External id": 295910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334165.091, "dur": 17.752, + "args": { + "External id": 295911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334190.892, "dur": 20.666, + "args": { + "External id": 295912,"Record function id": 0, "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334207.712, "dur": 3.406, + "args": { + "External id": 295913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334208.962, "dur": 1.608, + "args": { + "External id": 295914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334209.692, "dur": 0.813, + "args": { + "External id": 295915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334217.938, "dur": 4.442, + "args": { + "External id": 295916,"Record function id": 0, "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334219.341, "dur": 2.559, + "args": { + "External id": 295917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334220.167, "dur": 1.298, + "args": { + "External id": 295918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334220.831, "dur": 0.567, + "args": { + "External id": 295919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334225.610, "dur": 4.502, + "args": { + "External id": 295920,"Record function id": 0, "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334226.723, "dur": 2.945, + "args": { + "External id": 295921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334227.440, "dur": 1.817, + "args": { + "External id": 295922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334228.472, "dur": 0.682, + "args": { + "External id": 295923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334233.208, "dur": 4.064, + "args": { + "External id": 295924,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368334234.309, "dur": 2.554, + "args": { + "External id": 295925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334234.828, "dur": 1.597, + "args": { + "External id": 295926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368334235.403, "dur": 0.959, + "args": { + "External id": 295927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368334241.149, "dur": 37748.132, + "args": { + "External id": 295928,"Record function id": 0, "Sequence number": 1209191, "Fwd thread id": 1, "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368334242.349, "dur": 37738.583, + "args": { + "External id": 295929,"Sequence number": 1209191, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2552 + } + }, + { + "ph": "f", "id": 41, "pid": 2070547, "tid": 2107622, "ts": 5333368334242.349, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2070547, "tid": 2107622, + "ts": 5333368334272.661, "dur": 39.864, + "args": { + "External id": 295930,"Record function id": 0, "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2070547, "tid": 2107622, + "ts": 5333368334320.037, "dur": 60.010, + "args": { + "External id": 295931,"Record function id": 0, "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2070547, "tid": 2107622, + "ts": 5333368334385.638, "dur": 37587.868, + "args": { + "External id": 295932,"Record function id": 0, "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368334473.668, "dur": 7.320, + "args": { + "External id": 295933,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368334490.844, "dur": 4.659, + "args": { + "External id": 295934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368334508.616, "dur": 36634.403, + "args": { + "External id": 295935,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368334521.879, "dur": 36611.924, + "args": { + "External id": 295936,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368334567.607, "dur": 14.332, + "args": { + "External id": 295937,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368334587.908, "dur": 36509.583, + "args": { + "External id": 295938,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368334590.604, "dur": 36506.242, + "args": { + "External id": 295939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368334594.178, "dur": 7.040, + "args": { + "External id": 295940,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368334603.063, "dur": 36489.970, + "args": { + "External id": 295941,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368371246.978, "dur": 9.479, + "args": { + "External id": 295942,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368371249.796, "dur": 6.140, + "args": { + "External id": 295943,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368371283.249, "dur": 405.265, + "args": { + "External id": 295944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368371309.456, "dur": 373.896, + "args": { + "External id": 295945,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2568, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368371319.736, "dur": 357.403, + "args": { + "External id": 295946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368371709.935, "dur": 2.220, + "args": { + "External id": 295947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2570, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371775.981, "dur": 6.179, + "args": { + "External id": 295948,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371824.687, "dur": 1.257, + "args": { + "External id": 295949,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371840.469, "dur": 1.212, + "args": { + "External id": 295950,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371852.307, "dur": 2.631, + "args": { + "External id": 295951,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371866.313, "dur": 0.880, + "args": { + "External id": 295952,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371876.898, "dur": 0.699, + "args": { + "External id": 295953,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371887.145, "dur": 0.700, + "args": { + "External id": 295954,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371898.328, "dur": 3.467, + "args": { + "External id": 295955,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368371911.723, "dur": 0.690, + "args": { + "External id": 295956,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368372003.239, "dur": 2673.148, + "args": { + "External id": 295957,"Record function id": 0, "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2070547, "tid": 2107622, + "ts": 5333368372022.550, "dur": 995.925, + "args": { + "External id": 295958,"Record function id": 0, "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070547, "tid": 2107622, + "ts": 5333368372037.960, "dur": 338.112, + "args": { + "External id": 295959,"Record function id": 0, "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372114.197, "dur": 3.970, + "args": { + "External id": 295960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372121.096, "dur": 0.706, + "args": { + "External id": 295961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372123.814, "dur": 0.763, + "args": { + "External id": 295962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372126.151, "dur": 0.673, + "args": { + "External id": 295963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372128.209, "dur": 2.220, + "args": { + "External id": 295964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372131.836, "dur": 0.812, + "args": { + "External id": 295965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372134.327, "dur": 1.308, + "args": { + "External id": 295966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372137.075, "dur": 0.638, + "args": { + "External id": 295967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372139.298, "dur": 0.645, + "args": { + "External id": 295968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368372141.663, "dur": 0.576, + "args": { + "External id": 295969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368372160.323, "dur": 184.117, + "args": { + "External id": 295970,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368372193.732, "dur": 145.603, + "args": { + "External id": 295971,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368372227.035, "dur": 13.699, + "args": { + "External id": 295972,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368372244.800, "dur": 65.290, + "args": { + "External id": 295973,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368372247.081, "dur": 62.723, + "args": { + "External id": 295974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372250.615, "dur": 6.081, + "args": { + "External id": 295975,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368372258.185, "dur": 51.056, + "args": { + "External id": 295976,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2070547, "tid": 2107622, + "ts": 5333368372454.724, "dur": 555.800, + "args": { + "External id": 295977,"Record function id": 0, "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070547, "tid": 2107622, + "ts": 5333368372470.282, "dur": 527.002, + "args": { + "External id": 295978,"Record function id": 0, "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368372526.103, "dur": 6.882, + "args": { + "External id": 295979,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368372548.222, "dur": 25.280, + "args": { + "External id": 295980,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372552.701, "dur": 1.617, + "args": { + "External id": 295981,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372555.962, "dur": 0.792, + "args": { + "External id": 295982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372557.553, "dur": 0.615, + "args": { + "External id": 295983,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372559.665, "dur": 0.602, + "args": { + "External id": 295984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372561.261, "dur": 0.898, + "args": { + "External id": 295985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372563.658, "dur": 0.367, + "args": { + "External id": 295986,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372565.118, "dur": 0.361, + "args": { + "External id": 295987,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372566.778, "dur": 1.487, + "args": { + "External id": 295988,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372569.053, "dur": 0.562, + "args": { + "External id": 295989,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368372583.193, "dur": 29.617, + "args": { + "External id": 295990,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368372679.686, "dur": 100.149, + "args": { + "External id": 295991,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368372690.697, "dur": 4.526, + "args": { + "External id": 295992,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368372701.268, "dur": 9.750, + "args": { + "External id": 295993,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368372705.277, "dur": 5.291, + "args": { + "External id": 295994,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372708.155, "dur": 0.916, + "args": { + "External id": 295995,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368372717.611, "dur": 21.723, + "args": { + "External id": 295996,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372719.457, "dur": 0.402, + "args": { + "External id": 295997,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372720.963, "dur": 0.529, + "args": { + "External id": 295998,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372723.193, "dur": 0.580, + "args": { + "External id": 295999,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372725.132, "dur": 0.538, + "args": { + "External id": 296000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372727.136, "dur": 2.033, + "args": { + "External id": 296001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372730.450, "dur": 0.348, + "args": { + "External id": 296002,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372732.048, "dur": 0.440, + "args": { + "External id": 296003,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372733.956, "dur": 0.379, + "args": { + "External id": 296004,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368372735.771, "dur": 0.381, + "args": { + "External id": 296005,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368372750.058, "dur": 22.502, + "args": { + "External id": 296006,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368372823.759, "dur": 111.505, + "args": { + "External id": 296007,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368372848.463, "dur": 83.623, + "args": { + "External id": 296008,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2631, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368372857.729, "dur": 70.117, + "args": { + "External id": 296009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368372948.689, "dur": 1.734, + "args": { + "External id": 296010,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2633, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368373024.992, "dur": 1580.827, + "args": { + "External id": 296011,"Sequence number": 1209190, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2634 + } + }, + { + "ph": "f", "id": 42, "pid": 2070547, "tid": 2107622, "ts": 5333368373024.992, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373129.151, "dur": 123.517, + "args": { + "External id": 296012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368373296.273, "dur": 38.935, + "args": { + "External id": 296013,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373351.654, "dur": 49.859, + "args": { + "External id": 296014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373411.288, "dur": 32.441, + "args": { + "External id": 296015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373449.539, "dur": 45.542, + "args": { + "External id": 296016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373503.685, "dur": 28.027, + "args": { + "External id": 296017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373538.643, "dur": 40.517, + "args": { + "External id": 296018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368373600.897, "dur": 71.250, + "args": { + "External id": 296019,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368373693.231, "dur": 29.398, + "args": { + "External id": 296020,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368373745.335, "dur": 19.573, + "args": { + "External id": 296021,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368373778.719, "dur": 14.302, + "args": { + "External id": 296022,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373803.365, "dur": 32.780, + "args": { + "External id": 296023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368373839.251, "dur": 33.516, + "args": { + "External id": 296024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368373899.639, "dur": 165.922, + "args": { + "External id": 296025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368373973.563, "dur": 6.431, + "args": { + "External id": 296026,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368373982.000, "dur": 2.696, + "args": { + "External id": 296027,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368374095.107, "dur": 24.199, + "args": { + "External id": 296028,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368374131.112, "dur": 14.236, + "args": { + "External id": 296029,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368374154.682, "dur": 52.499, + "args": { + "External id": 296030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368374215.780, "dur": 38.463, + "args": { + "External id": 296031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368374261.199, "dur": 19.662, + "args": { + "External id": 296032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368374285.435, "dur": 29.472, + "args": { + "External id": 296033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368374322.693, "dur": 19.265, + "args": { + "External id": 296034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368374347.996, "dur": 28.483, + "args": { + "External id": 296035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368374395.601, "dur": 22.603, + "args": { + "External id": 296036,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368374436.401, "dur": 41.024, + "args": { + "External id": 296037,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368374497.794, "dur": 18.739, + "args": { + "External id": 296038,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368374530.776, "dur": 14.692, + "args": { + "External id": 296039,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368374559.090, "dur": 18.891, + "args": { + "External id": 296040,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374700.840, "dur": 15.648, + "args": { + "External id": 296041,"Record function id": 0, "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374704.211, "dur": 11.121, + "args": { + "External id": 296042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374708.211, "dur": 5.656, + "args": { + "External id": 296043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374709.664, "dur": 4.107, + "args": { + "External id": 296044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374720.405, "dur": 5.077, + "args": { + "External id": 296045,"Record function id": 0, "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374721.926, "dur": 3.103, + "args": { + "External id": 296046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374722.669, "dur": 1.897, + "args": { + "External id": 296047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374723.451, "dur": 1.047, + "args": { + "External id": 296048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374728.739, "dur": 5.763, + "args": { + "External id": 296049,"Record function id": 0, "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374729.815, "dur": 4.244, + "args": { + "External id": 296050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374730.422, "dur": 3.204, + "args": { + "External id": 296051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374730.951, "dur": 2.593, + "args": { + "External id": 296052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374737.637, "dur": 4.253, + "args": { + "External id": 296053,"Record function id": 0, "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374739.226, "dur": 2.267, + "args": { + "External id": 296054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374739.757, "dur": 1.312, + "args": { + "External id": 296055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374740.211, "dur": 0.793, + "args": { + "External id": 296056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374745.084, "dur": 4.282, + "args": { + "External id": 296057,"Record function id": 0, "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374746.108, "dur": 2.811, + "args": { + "External id": 296058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374746.766, "dur": 1.657, + "args": { + "External id": 296059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374747.681, "dur": 0.666, + "args": { + "External id": 296060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374752.583, "dur": 4.259, + "args": { + "External id": 296061,"Record function id": 0, "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374753.682, "dur": 2.746, + "args": { + "External id": 296062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374754.172, "dur": 1.783, + "args": { + "External id": 296063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374755.146, "dur": 0.740, + "args": { + "External id": 296064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374759.944, "dur": 4.411, + "args": { + "External id": 296065,"Record function id": 0, "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374761.234, "dur": 2.700, + "args": { + "External id": 296066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374762.155, "dur": 1.362, + "args": { + "External id": 296067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374762.742, "dur": 0.708, + "args": { + "External id": 296068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374767.450, "dur": 4.198, + "args": { + "External id": 296069,"Record function id": 0, "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374768.549, "dur": 2.671, + "args": { + "External id": 296070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374769.171, "dur": 1.622, + "args": { + "External id": 296071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374769.980, "dur": 0.749, + "args": { + "External id": 296072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374775.208, "dur": 4.117, + "args": { + "External id": 296073,"Record function id": 0, "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368374776.508, "dur": 2.410, + "args": { + "External id": 296074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374777.341, "dur": 1.155, + "args": { + "External id": 296075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368374777.737, "dur": 0.684, + "args": { + "External id": 296076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368374783.487, "dur": 37230.342, + "args": { + "External id": 296077,"Record function id": 0, "Sequence number": 1209189, "Fwd thread id": 1, "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368374784.859, "dur": 37220.395, + "args": { + "External id": 296078,"Sequence number": 1209189, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2701 + } + }, + { + "ph": "f", "id": 43, "pid": 2070547, "tid": 2107622, "ts": 5333368374784.859, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2070547, "tid": 2107622, + "ts": 5333368374814.706, "dur": 37.289, + "args": { + "External id": 296079,"Record function id": 0, "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2070547, "tid": 2107622, + "ts": 5333368374859.963, "dur": 61.376, + "args": { + "External id": 296080,"Record function id": 0, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2070547, "tid": 2107622, + "ts": 5333368374926.755, "dur": 37071.424, + "args": { + "External id": 296081,"Record function id": 0, "Ev Idx": 2704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368375016.352, "dur": 6.988, + "args": { + "External id": 296082,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368375032.399, "dur": 4.753, + "args": { + "External id": 296083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368375051.477, "dur": 36151.756, + "args": { + "External id": 296084,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368375078.519, "dur": 36115.065, + "args": { + "External id": 296085,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368375164.698, "dur": 31.744, + "args": { + "External id": 296086,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368375203.551, "dur": 35939.620, + "args": { + "External id": 296087,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368375206.563, "dur": 35936.008, + "args": { + "External id": 296088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368375213.486, "dur": 5.785, + "args": { + "External id": 296089,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368375221.116, "dur": 35918.169, + "args": { + "External id": 296090,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368411292.971, "dur": 8.886, + "args": { + "External id": 296091,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368411295.719, "dur": 5.789, + "args": { + "External id": 296092,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368411329.903, "dur": 389.297, + "args": { + "External id": 296093,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368411356.048, "dur": 358.072, + "args": { + "External id": 296094,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2717, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368411366.327, "dur": 341.851, + "args": { + "External id": 296095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368411739.843, "dur": 2.096, + "args": { + "External id": 296096,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2719, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411801.893, "dur": 6.584, + "args": { + "External id": 296097,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411851.035, "dur": 1.542, + "args": { + "External id": 296098,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411867.295, "dur": 1.370, + "args": { + "External id": 296099,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411880.033, "dur": 2.562, + "args": { + "External id": 296100,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411894.292, "dur": 0.743, + "args": { + "External id": 296101,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411903.938, "dur": 0.781, + "args": { + "External id": 296102,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411914.349, "dur": 0.724, + "args": { + "External id": 296103,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411925.483, "dur": 4.017, + "args": { + "External id": 296104,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368411938.594, "dur": 0.668, + "args": { + "External id": 296105,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368412027.570, "dur": 2575.937, + "args": { + "External id": 296106,"Record function id": 0, "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2070547, "tid": 2107622, + "ts": 5333368412046.360, "dur": 987.935, + "args": { + "External id": 296107,"Record function id": 0, "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070547, "tid": 2107622, + "ts": 5333368412059.660, "dur": 320.855, + "args": { + "External id": 296108,"Record function id": 0, "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412132.598, "dur": 4.104, + "args": { + "External id": 296109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412139.704, "dur": 0.785, + "args": { + "External id": 296110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412142.149, "dur": 0.705, + "args": { + "External id": 296111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412144.476, "dur": 0.542, + "args": { + "External id": 296112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412146.760, "dur": 0.628, + "args": { + "External id": 296113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412148.821, "dur": 2.553, + "args": { + "External id": 296114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412152.890, "dur": 1.415, + "args": { + "External id": 296115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412155.631, "dur": 0.639, + "args": { + "External id": 296116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412157.732, "dur": 0.614, + "args": { + "External id": 296117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368412160.218, "dur": 0.531, + "args": { + "External id": 296118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368412196.713, "dur": 153.801, + "args": { + "External id": 296119,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368412212.806, "dur": 132.904, + "args": { + "External id": 296120,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368412227.515, "dur": 12.481, + "args": { + "External id": 296121,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368412244.109, "dur": 72.863, + "args": { + "External id": 296122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368412246.449, "dur": 70.216, + "args": { + "External id": 296123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412249.691, "dur": 6.340, + "args": { + "External id": 296124,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368412257.703, "dur": 58.128, + "args": { + "External id": 296125,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2070547, "tid": 2107622, + "ts": 5333368412456.333, "dur": 570.164, + "args": { + "External id": 296126,"Record function id": 0, "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070547, "tid": 2107622, + "ts": 5333368412472.692, "dur": 540.457, + "args": { + "External id": 296127,"Record function id": 0, "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368412528.471, "dur": 5.624, + "args": { + "External id": 296128,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368412549.663, "dur": 24.565, + "args": { + "External id": 296129,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412553.943, "dur": 1.490, + "args": { + "External id": 296130,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412557.192, "dur": 2.427, + "args": { + "External id": 296131,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412560.575, "dur": 0.559, + "args": { + "External id": 296132,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412562.593, "dur": 0.364, + "args": { + "External id": 296133,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412564.110, "dur": 0.480, + "args": { + "External id": 296134,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412565.694, "dur": 0.398, + "args": { + "External id": 296135,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412567.010, "dur": 0.576, + "args": { + "External id": 296136,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412568.913, "dur": 0.271, + "args": { + "External id": 296137,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412569.880, "dur": 0.374, + "args": { + "External id": 296138,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368412583.941, "dur": 30.400, + "args": { + "External id": 296139,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368412685.556, "dur": 99.957, + "args": { + "External id": 296140,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368412696.355, "dur": 6.518, + "args": { + "External id": 296141,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368412707.935, "dur": 9.904, + "args": { + "External id": 296142,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368412711.912, "dur": 5.539, + "args": { + "External id": 296143,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412715.266, "dur": 0.682, + "args": { + "External id": 296144,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368412724.090, "dur": 20.699, + "args": { + "External id": 296145,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412725.735, "dur": 0.336, + "args": { + "External id": 296146,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412727.680, "dur": 0.812, + "args": { + "External id": 296147,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412729.910, "dur": 0.416, + "args": { + "External id": 296148,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412731.540, "dur": 0.615, + "args": { + "External id": 296149,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412733.461, "dur": 0.366, + "args": { + "External id": 296150,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412734.983, "dur": 0.811, + "args": { + "External id": 296151,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412736.890, "dur": 2.023, + "args": { + "External id": 296152,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412740.106, "dur": 0.330, + "args": { + "External id": 296153,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368412741.464, "dur": 0.385, + "args": { + "External id": 296154,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368412755.400, "dur": 22.701, + "args": { + "External id": 296155,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368412833.014, "dur": 112.688, + "args": { + "External id": 296156,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368412856.729, "dur": 85.849, + "args": { + "External id": 296157,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2780, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368412865.875, "dur": 72.261, + "args": { + "External id": 296158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368412960.121, "dur": 1.700, + "args": { + "External id": 296159,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2782, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368413041.317, "dur": 1538.831, + "args": { + "External id": 296160,"Sequence number": 1209188, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2783 + } + }, + { + "ph": "f", "id": 44, "pid": 2070547, "tid": 2107622, "ts": 5333368413041.317, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413146.928, "dur": 123.295, + "args": { + "External id": 296161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368413309.851, "dur": 37.726, + "args": { + "External id": 296162,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413364.047, "dur": 49.412, + "args": { + "External id": 296163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413423.803, "dur": 31.066, + "args": { + "External id": 296164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413460.894, "dur": 44.286, + "args": { + "External id": 296165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413511.502, "dur": 27.311, + "args": { + "External id": 296166,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413545.734, "dur": 40.666, + "args": { + "External id": 296167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368413607.577, "dur": 60.525, + "args": { + "External id": 296168,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368413689.745, "dur": 29.158, + "args": { + "External id": 296169,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368413740.981, "dur": 20.393, + "args": { + "External id": 296170,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368413773.155, "dur": 15.041, + "args": { + "External id": 296171,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413795.498, "dur": 33.500, + "args": { + "External id": 296172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368413832.434, "dur": 32.048, + "args": { + "External id": 296173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368413891.879, "dur": 166.977, + "args": { + "External id": 296174,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368413968.216, "dur": 6.108, + "args": { + "External id": 296175,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368413976.044, "dur": 2.566, + "args": { + "External id": 296176,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368414086.128, "dur": 22.666, + "args": { + "External id": 296177,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368414121.266, "dur": 15.585, + "args": { + "External id": 296178,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368414143.468, "dur": 48.864, + "args": { + "External id": 296179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368414200.588, "dur": 39.397, + "args": { + "External id": 296180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368414248.223, "dur": 21.391, + "args": { + "External id": 296181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368414273.918, "dur": 29.109, + "args": { + "External id": 296182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368414308.824, "dur": 21.525, + "args": { + "External id": 296183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368414335.944, "dur": 28.818, + "args": { + "External id": 296184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368414384.186, "dur": 21.627, + "args": { + "External id": 296185,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368414421.043, "dur": 23.896, + "args": { + "External id": 296186,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368414459.023, "dur": 18.332, + "args": { + "External id": 296187,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368414489.965, "dur": 14.031, + "args": { + "External id": 296188,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368414515.525, "dur": 30.455, + "args": { + "External id": 296189,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414676.141, "dur": 16.498, + "args": { + "External id": 296190,"Record function id": 0, "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414680.047, "dur": 11.315, + "args": { + "External id": 296191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414684.335, "dur": 5.865, + "args": { + "External id": 296192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414685.821, "dur": 4.132, + "args": { + "External id": 296193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414697.837, "dur": 4.003, + "args": { + "External id": 296194,"Record function id": 0, "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414698.879, "dur": 2.495, + "args": { + "External id": 296195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414699.963, "dur": 0.938, + "args": { + "External id": 296196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414700.288, "dur": 0.540, + "args": { + "External id": 296197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414705.329, "dur": 4.067, + "args": { + "External id": 296198,"Record function id": 0, "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414706.717, "dur": 2.235, + "args": { + "External id": 296199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414707.332, "dur": 1.219, + "args": { + "External id": 296200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414707.783, "dur": 0.684, + "args": { + "External id": 296201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414712.592, "dur": 3.498, + "args": { + "External id": 296202,"Record function id": 0, "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414713.865, "dur": 1.797, + "args": { + "External id": 296203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414714.339, "dur": 0.854, + "args": { + "External id": 296204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414714.636, "dur": 0.474, + "args": { + "External id": 296205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414719.042, "dur": 3.502, + "args": { + "External id": 296206,"Record function id": 0, "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414720.024, "dur": 2.094, + "args": { + "External id": 296207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414720.599, "dur": 1.092, + "args": { + "External id": 296208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414721.039, "dur": 0.584, + "args": { + "External id": 296209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414725.580, "dur": 3.763, + "args": { + "External id": 296210,"Record function id": 0, "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414726.788, "dur": 2.111, + "args": { + "External id": 296211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414727.234, "dur": 1.259, + "args": { + "External id": 296212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414727.662, "dur": 0.767, + "args": { + "External id": 296213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414735.405, "dur": 5.000, + "args": { + "External id": 296214,"Record function id": 0, "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414736.545, "dur": 3.421, + "args": { + "External id": 296215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414737.008, "dur": 2.536, + "args": { + "External id": 296216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414737.411, "dur": 2.064, + "args": { + "External id": 296217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414743.387, "dur": 3.529, + "args": { + "External id": 296218,"Record function id": 0, "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414744.525, "dur": 1.965, + "args": { + "External id": 296219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414744.978, "dur": 1.100, + "args": { + "External id": 296220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414745.386, "dur": 0.618, + "args": { + "External id": 296221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414749.905, "dur": 3.459, + "args": { + "External id": 296222,"Record function id": 0, "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368414750.886, "dur": 2.066, + "args": { + "External id": 296223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414751.391, "dur": 1.114, + "args": { + "External id": 296224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368414751.785, "dur": 0.646, + "args": { + "External id": 296225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368414757.321, "dur": 39302.377, + "args": { + "External id": 296226,"Record function id": 0, "Sequence number": 1209187, "Fwd thread id": 1, "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368414758.604, "dur": 39292.858, + "args": { + "External id": 296227,"Sequence number": 1209187, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2850 + } + }, + { + "ph": "f", "id": 45, "pid": 2070547, "tid": 2107622, "ts": 5333368414758.604, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2070547, "tid": 2107622, + "ts": 5333368414787.329, "dur": 35.670, + "args": { + "External id": 296228,"Record function id": 0, "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2070547, "tid": 2107622, + "ts": 5333368414830.899, "dur": 59.950, + "args": { + "External id": 296229,"Record function id": 0, "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2070547, "tid": 2107622, + "ts": 5333368414896.610, "dur": 39147.910, + "args": { + "External id": 296230,"Record function id": 0, "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368414981.614, "dur": 7.024, + "args": { + "External id": 296231,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368414997.896, "dur": 4.598, + "args": { + "External id": 296232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368415016.125, "dur": 38184.721, + "args": { + "External id": 296233,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368415033.764, "dur": 38157.837, + "args": { + "External id": 296234,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368415099.367, "dur": 14.387, + "args": { + "External id": 296235,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368415120.002, "dur": 38022.391, + "args": { + "External id": 296236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368415122.250, "dur": 38019.369, + "args": { + "External id": 296237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368415125.737, "dur": 5.188, + "args": { + "External id": 296238,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368415132.528, "dur": 38005.488, + "args": { + "External id": 296239,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368453291.451, "dur": 9.189, + "args": { + "External id": 296240,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368453294.302, "dur": 5.930, + "args": { + "External id": 296241,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368453330.069, "dur": 435.287, + "args": { + "External id": 296242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368453353.690, "dur": 406.045, + "args": { + "External id": 296243,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2866, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368453364.373, "dur": 389.365, + "args": { + "External id": 296244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368453785.582, "dur": 2.333, + "args": { + "External id": 296245,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2868, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453851.287, "dur": 6.690, + "args": { + "External id": 296246,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453900.388, "dur": 1.186, + "args": { + "External id": 296247,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453916.078, "dur": 2.086, + "args": { + "External id": 296248,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453928.946, "dur": 0.729, + "args": { + "External id": 296249,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453940.822, "dur": 0.730, + "args": { + "External id": 296250,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453950.744, "dur": 0.810, + "args": { + "External id": 296251,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453961.264, "dur": 1.985, + "args": { + "External id": 296252,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453973.329, "dur": 1.425, + "args": { + "External id": 296253,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368453983.633, "dur": 0.753, + "args": { + "External id": 296254,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368454073.763, "dur": 2591.003, + "args": { + "External id": 296255,"Record function id": 0, "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2070547, "tid": 2107622, + "ts": 5333368454092.946, "dur": 948.794, + "args": { + "External id": 296256,"Record function id": 0, "Ev Idx": 2879 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070547, "tid": 2107622, + "ts": 5333368454106.732, "dur": 314.233, + "args": { + "External id": 296257,"Record function id": 0, "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454200.367, "dur": 4.607, + "args": { + "External id": 296258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454208.310, "dur": 0.572, + "args": { + "External id": 296259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454210.641, "dur": 0.486, + "args": { + "External id": 296260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454212.974, "dur": 0.646, + "args": { + "External id": 296261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454214.955, "dur": 1.879, + "args": { + "External id": 296262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454218.360, "dur": 0.608, + "args": { + "External id": 296263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454220.468, "dur": 1.428, + "args": { + "External id": 296264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454223.116, "dur": 0.435, + "args": { + "External id": 296265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454224.891, "dur": 0.674, + "args": { + "External id": 296266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368454226.754, "dur": 0.487, + "args": { + "External id": 296267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368454246.229, "dur": 145.011, + "args": { + "External id": 296268,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368454264.097, "dur": 122.605, + "args": { + "External id": 296269,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368454277.466, "dur": 12.404, + "args": { + "External id": 296270,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368454294.099, "dur": 64.959, + "args": { + "External id": 296271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368454296.478, "dur": 62.305, + "args": { + "External id": 296272,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454300.032, "dur": 5.691, + "args": { + "External id": 296273,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368454307.277, "dur": 51.010, + "args": { + "External id": 296274,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2070547, "tid": 2107622, + "ts": 5333368454495.355, "dur": 539.055, + "args": { + "External id": 296275,"Record function id": 0, "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070547, "tid": 2107622, + "ts": 5333368454510.452, "dur": 511.489, + "args": { + "External id": 296276,"Record function id": 0, "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368454563.989, "dur": 4.395, + "args": { + "External id": 296277,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368454584.060, "dur": 21.205, + "args": { + "External id": 296278,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454588.173, "dur": 2.265, + "args": { + "External id": 296279,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454591.554, "dur": 0.341, + "args": { + "External id": 296280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454592.602, "dur": 0.185, + "args": { + "External id": 296281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454593.573, "dur": 0.223, + "args": { + "External id": 296282,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454594.683, "dur": 0.209, + "args": { + "External id": 296283,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454595.570, "dur": 0.606, + "args": { + "External id": 296284,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454597.203, "dur": 0.523, + "args": { + "External id": 296285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454598.725, "dur": 0.404, + "args": { + "External id": 296286,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454600.137, "dur": 1.747, + "args": { + "External id": 296287,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368454614.963, "dur": 70.010, + "args": { + "External id": 296288,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368454720.387, "dur": 92.374, + "args": { + "External id": 296289,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368454730.842, "dur": 4.329, + "args": { + "External id": 296290,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368454739.730, "dur": 9.933, + "args": { + "External id": 296291,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368454743.748, "dur": 5.508, + "args": { + "External id": 296292,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454747.070, "dur": 0.616, + "args": { + "External id": 296293,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368454756.048, "dur": 20.054, + "args": { + "External id": 296294,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454757.664, "dur": 0.342, + "args": { + "External id": 296295,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454759.159, "dur": 0.247, + "args": { + "External id": 296296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454763.722, "dur": 0.320, + "args": { + "External id": 296297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454764.875, "dur": 0.861, + "args": { + "External id": 296298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454766.777, "dur": 0.400, + "args": { + "External id": 296299,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454768.332, "dur": 1.233, + "args": { + "External id": 296300,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454770.799, "dur": 0.220, + "args": { + "External id": 296301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454771.851, "dur": 0.380, + "args": { + "External id": 296302,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368454772.858, "dur": 0.225, + "args": { + "External id": 296303,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368454786.012, "dur": 19.530, + "args": { + "External id": 296304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368454854.639, "dur": 107.262, + "args": { + "External id": 296305,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368454876.292, "dur": 82.619, + "args": { + "External id": 296306,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2929, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368454884.612, "dur": 69.974, + "args": { + "External id": 296307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368454974.593, "dur": 1.575, + "args": { + "External id": 296308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2931, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368455047.789, "dur": 1560.557, + "args": { + "External id": 296309,"Sequence number": 1209186, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2932 + } + }, + { + "ph": "f", "id": 46, "pid": 2070547, "tid": 2107622, "ts": 5333368455047.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455158.284, "dur": 121.561, + "args": { + "External id": 296310,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368455319.935, "dur": 38.935, + "args": { + "External id": 296311,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455376.004, "dur": 47.947, + "args": { + "External id": 296312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455433.693, "dur": 33.125, + "args": { + "External id": 296313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455472.957, "dur": 49.746, + "args": { + "External id": 296314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455529.112, "dur": 27.007, + "args": { + "External id": 296315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455563.877, "dur": 41.236, + "args": { + "External id": 296316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368455667.318, "dur": 29.526, + "args": { + "External id": 296317,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368455715.628, "dur": 32.585, + "args": { + "External id": 296318,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368455770.165, "dur": 20.843, + "args": { + "External id": 296319,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368455805.244, "dur": 15.335, + "args": { + "External id": 296320,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455827.401, "dur": 34.441, + "args": { + "External id": 296321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368455865.085, "dur": 33.432, + "args": { + "External id": 296322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368455926.330, "dur": 166.342, + "args": { + "External id": 296323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368456001.924, "dur": 5.907, + "args": { + "External id": 296324,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368456009.823, "dur": 2.397, + "args": { + "External id": 296325,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368456120.755, "dur": 21.832, + "args": { + "External id": 296326,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368456154.234, "dur": 29.800, + "args": { + "External id": 296327,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368456194.784, "dur": 38.657, + "args": { + "External id": 296328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368456240.380, "dur": 35.277, + "args": { + "External id": 296329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368456281.759, "dur": 20.304, + "args": { + "External id": 296330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368456306.526, "dur": 29.742, + "args": { + "External id": 296331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368456342.208, "dur": 20.864, + "args": { + "External id": 296332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368456370.012, "dur": 29.770, + "args": { + "External id": 296333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368456416.532, "dur": 20.830, + "args": { + "External id": 296334,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368456452.972, "dur": 20.833, + "args": { + "External id": 296335,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368456487.012, "dur": 15.199, + "args": { + "External id": 296336,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368456515.349, "dur": 26.675, + "args": { + "External id": 296337,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368456562.665, "dur": 18.593, + "args": { + "External id": 296338,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456692.248, "dur": 15.268, + "args": { + "External id": 296339,"Record function id": 0, "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456695.524, "dur": 11.023, + "args": { + "External id": 296340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456699.502, "dur": 5.754, + "args": { + "External id": 296341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456700.777, "dur": 4.385, + "args": { + "External id": 296342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456711.223, "dur": 4.013, + "args": { + "External id": 296343,"Record function id": 0, "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456712.555, "dur": 2.225, + "args": { + "External id": 296344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456713.277, "dur": 1.005, + "args": { + "External id": 296345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456713.577, "dur": 0.636, + "args": { + "External id": 296346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456718.535, "dur": 4.009, + "args": { + "External id": 296347,"Record function id": 0, "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456719.915, "dur": 2.175, + "args": { + "External id": 296348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456720.549, "dur": 1.140, + "args": { + "External id": 296349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456720.969, "dur": 0.640, + "args": { + "External id": 296350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456725.709, "dur": 3.633, + "args": { + "External id": 296351,"Record function id": 0, "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456726.782, "dur": 2.118, + "args": { + "External id": 296352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456727.431, "dur": 1.052, + "args": { + "External id": 296353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456727.898, "dur": 0.499, + "args": { + "External id": 296354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456732.407, "dur": 4.821, + "args": { + "External id": 296355,"Record function id": 0, "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456733.391, "dur": 3.380, + "args": { + "External id": 296356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456733.880, "dur": 2.467, + "args": { + "External id": 296357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456734.250, "dur": 2.023, + "args": { + "External id": 296358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456743.269, "dur": 3.658, + "args": { + "External id": 296359,"Record function id": 0, "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456744.551, "dur": 1.924, + "args": { + "External id": 296360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456745.040, "dur": 1.041, + "args": { + "External id": 296361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456745.478, "dur": 0.511, + "args": { + "External id": 296362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456750.091, "dur": 4.469, + "args": { + "External id": 296363,"Record function id": 0, "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456751.691, "dur": 2.435, + "args": { + "External id": 296364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456752.525, "dur": 1.182, + "args": { + "External id": 296365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456752.912, "dur": 0.728, + "args": { + "External id": 296366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456757.639, "dur": 3.270, + "args": { + "External id": 296367,"Record function id": 0, "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456758.546, "dur": 1.940, + "args": { + "External id": 296368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456759.040, "dur": 1.013, + "args": { + "External id": 296369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456759.335, "dur": 0.653, + "args": { + "External id": 296370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456764.578, "dur": 3.265, + "args": { + "External id": 296371,"Record function id": 0, "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368456765.565, "dur": 1.822, + "args": { + "External id": 296372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456766.045, "dur": 0.944, + "args": { + "External id": 296373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368456766.472, "dur": 0.453, + "args": { + "External id": 296374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368456771.822, "dur": 37266.495, + "args": { + "External id": 296375,"Record function id": 0, "Sequence number": 1209185, "Fwd thread id": 1, "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368456773.018, "dur": 37255.930, + "args": { + "External id": 296376,"Sequence number": 1209185, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2999 + } + }, + { + "ph": "f", "id": 47, "pid": 2070547, "tid": 2107622, "ts": 5333368456773.018, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2070547, "tid": 2107622, + "ts": 5333368456806.297, "dur": 37.338, + "args": { + "External id": 296377,"Record function id": 0, "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2070547, "tid": 2107622, + "ts": 5333368456851.399, "dur": 62.268, + "args": { + "External id": 296378,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2070547, "tid": 2107622, + "ts": 5333368456919.455, "dur": 37101.023, + "args": { + "External id": 296379,"Record function id": 0, "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368457005.444, "dur": 6.719, + "args": { + "External id": 296380,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368457021.566, "dur": 4.392, + "args": { + "External id": 296381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368457039.334, "dur": 36179.890, + "args": { + "External id": 296382,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368457052.133, "dur": 36156.995, + "args": { + "External id": 296383,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368457118.489, "dur": 17.340, + "args": { + "External id": 296384,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368457141.852, "dur": 36019.425, + "args": { + "External id": 296385,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368457144.438, "dur": 36015.959, + "args": { + "External id": 296386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368457148.147, "dur": 5.465, + "args": { + "External id": 296387,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368457155.334, "dur": 36001.180, + "args": { + "External id": 296388,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368493311.445, "dur": 10.383, + "args": { + "External id": 296389,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368493314.391, "dur": 7.074, + "args": { + "External id": 296390,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368493350.585, "dur": 390.344, + "args": { + "External id": 296391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368493375.593, "dur": 360.260, + "args": { + "External id": 296392,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3015, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368493386.047, "dur": 343.913, + "args": { + "External id": 296393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368493760.062, "dur": 2.046, + "args": { + "External id": 296394,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3017, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493824.131, "dur": 6.578, + "args": { + "External id": 296395,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493877.008, "dur": 1.220, + "args": { + "External id": 296396,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493893.896, "dur": 0.919, + "args": { + "External id": 296397,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493906.625, "dur": 0.871, + "args": { + "External id": 296398,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493917.235, "dur": 0.877, + "args": { + "External id": 296399,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493927.513, "dur": 0.677, + "args": { + "External id": 296400,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493938.203, "dur": 0.679, + "args": { + "External id": 296401,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493947.990, "dur": 1.773, + "args": { + "External id": 296402,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368493958.900, "dur": 0.678, + "args": { + "External id": 296403,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368494052.845, "dur": 2668.407, + "args": { + "External id": 296404,"Record function id": 0, "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2070547, "tid": 2107622, + "ts": 5333368494072.099, "dur": 996.526, + "args": { + "External id": 296405,"Record function id": 0, "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070547, "tid": 2107622, + "ts": 5333368494086.004, "dur": 314.378, + "args": { + "External id": 296406,"Record function id": 0, "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494163.881, "dur": 20.843, + "args": { + "External id": 296407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494190.209, "dur": 0.795, + "args": { + "External id": 296408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494192.703, "dur": 0.486, + "args": { + "External id": 296409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494194.845, "dur": 1.512, + "args": { + "External id": 296410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494198.208, "dur": 0.748, + "args": { + "External id": 296411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494200.747, "dur": 0.501, + "args": { + "External id": 296412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494203.006, "dur": 1.390, + "args": { + "External id": 296413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494205.881, "dur": 0.501, + "args": { + "External id": 296414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494208.018, "dur": 0.428, + "args": { + "External id": 296415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368494209.946, "dur": 0.427, + "args": { + "External id": 296416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368494228.635, "dur": 143.509, + "args": { + "External id": 296417,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368494243.732, "dur": 123.702, + "args": { + "External id": 296418,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368494256.358, "dur": 13.085, + "args": { + "External id": 296419,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368494273.507, "dur": 66.631, + "args": { + "External id": 296420,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368494276.260, "dur": 63.566, + "args": { + "External id": 296421,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494279.697, "dur": 7.116, + "args": { + "External id": 296422,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368494288.293, "dur": 51.061, + "args": { + "External id": 296423,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2070547, "tid": 2107622, + "ts": 5333368494474.625, "dur": 586.293, + "args": { + "External id": 296424,"Record function id": 0, "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070547, "tid": 2107622, + "ts": 5333368494490.656, "dur": 557.310, + "args": { + "External id": 296425,"Record function id": 0, "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368494543.946, "dur": 4.807, + "args": { + "External id": 296426,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368494564.585, "dur": 21.957, + "args": { + "External id": 296427,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494568.984, "dur": 1.276, + "args": { + "External id": 296428,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494571.702, "dur": 0.391, + "args": { + "External id": 296429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494572.859, "dur": 0.290, + "args": { + "External id": 296430,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494574.241, "dur": 0.623, + "args": { + "External id": 296431,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494575.580, "dur": 0.262, + "args": { + "External id": 296432,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494576.928, "dur": 0.336, + "args": { + "External id": 296433,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494578.497, "dur": 1.678, + "args": { + "External id": 296434,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494581.164, "dur": 0.411, + "args": { + "External id": 296435,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494582.786, "dur": 0.362, + "args": { + "External id": 296436,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368494595.952, "dur": 73.431, + "args": { + "External id": 296437,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368494705.390, "dur": 123.023, + "args": { + "External id": 296438,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368494715.901, "dur": 4.293, + "args": { + "External id": 296439,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368494725.066, "dur": 9.014, + "args": { + "External id": 296440,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368494728.852, "dur": 4.807, + "args": { + "External id": 296441,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494731.620, "dur": 0.534, + "args": { + "External id": 296442,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368494740.538, "dur": 47.745, + "args": { + "External id": 296443,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494742.296, "dur": 0.298, + "args": { + "External id": 296444,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494743.594, "dur": 0.279, + "args": { + "External id": 296445,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494745.411, "dur": 0.248, + "args": { + "External id": 296446,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494747.172, "dur": 1.307, + "args": { + "External id": 296447,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494749.756, "dur": 0.384, + "args": { + "External id": 296448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494780.424, "dur": 0.479, + "args": { + "External id": 296449,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494782.309, "dur": 0.391, + "args": { + "External id": 296450,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494783.589, "dur": 0.205, + "args": { + "External id": 296451,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368494785.289, "dur": 0.288, + "args": { + "External id": 296452,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368494798.481, "dur": 22.634, + "args": { + "External id": 296453,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368494874.778, "dur": 110.240, + "args": { + "External id": 296454,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368494897.080, "dur": 84.749, + "args": { + "External id": 296455,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3078, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368494906.282, "dur": 71.367, + "args": { + "External id": 296456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368494997.427, "dur": 1.702, + "args": { + "External id": 296457,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3080, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368495075.143, "dur": 1620.940, + "args": { + "External id": 296458,"Sequence number": 1209184, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3081 + } + }, + { + "ph": "f", "id": 48, "pid": 2070547, "tid": 2107622, "ts": 5333368495075.143, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495197.328, "dur": 106.575, + "args": { + "External id": 296459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368495341.913, "dur": 39.023, + "args": { + "External id": 296460,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495396.397, "dur": 49.110, + "args": { + "External id": 296461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495454.133, "dur": 32.199, + "args": { + "External id": 296462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495492.202, "dur": 45.236, + "args": { + "External id": 296463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495544.055, "dur": 27.501, + "args": { + "External id": 296464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495578.752, "dur": 81.821, + "args": { + "External id": 296465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368495688.679, "dur": 26.852, + "args": { + "External id": 296466,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368495733.661, "dur": 29.584, + "args": { + "External id": 296467,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368495781.863, "dur": 18.695, + "args": { + "External id": 296468,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368495813.604, "dur": 13.000, + "args": { + "External id": 296469,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495834.558, "dur": 31.972, + "args": { + "External id": 296470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368495870.076, "dur": 36.366, + "args": { + "External id": 296471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368495932.926, "dur": 179.722, + "args": { + "External id": 296472,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368496014.343, "dur": 6.106, + "args": { + "External id": 296473,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368496022.309, "dur": 3.013, + "args": { + "External id": 296474,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368496142.929, "dur": 42.874, + "args": { + "External id": 296475,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368496199.275, "dur": 17.449, + "args": { + "External id": 296476,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368496226.545, "dur": 42.109, + "args": { + "External id": 296477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368496274.802, "dur": 36.561, + "args": { + "External id": 296478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368496317.653, "dur": 20.782, + "args": { + "External id": 296479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368496342.601, "dur": 30.019, + "args": { + "External id": 296480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368496378.451, "dur": 19.789, + "args": { + "External id": 296481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368496404.310, "dur": 28.336, + "args": { + "External id": 296482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368496451.166, "dur": 21.595, + "args": { + "External id": 296483,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368496492.624, "dur": 38.639, + "args": { + "External id": 296484,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368496556.085, "dur": 16.948, + "args": { + "External id": 296485,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368496587.827, "dur": 13.501, + "args": { + "External id": 296486,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368496612.085, "dur": 53.181, + "args": { + "External id": 296487,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496744.525, "dur": 15.111, + "args": { + "External id": 296488,"Record function id": 0, "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496747.868, "dur": 10.772, + "args": { + "External id": 296489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496751.907, "dur": 5.841, + "args": { + "External id": 296490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496753.241, "dur": 4.405, + "args": { + "External id": 296491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496763.543, "dur": 4.305, + "args": { + "External id": 296492,"Record function id": 0, "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496764.974, "dur": 2.359, + "args": { + "External id": 296493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496765.551, "dur": 1.355, + "args": { + "External id": 296494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496765.899, "dur": 0.940, + "args": { + "External id": 296495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496771.114, "dur": 4.272, + "args": { + "External id": 296496,"Record function id": 0, "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496772.576, "dur": 2.390, + "args": { + "External id": 296497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496773.145, "dur": 1.382, + "args": { + "External id": 296498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496773.508, "dur": 0.935, + "args": { + "External id": 296499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496778.593, "dur": 3.900, + "args": { + "External id": 296500,"Record function id": 0, "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496779.825, "dur": 2.221, + "args": { + "External id": 296501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496780.364, "dur": 1.266, + "args": { + "External id": 296502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496780.871, "dur": 0.701, + "args": { + "External id": 296503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496785.612, "dur": 4.247, + "args": { + "External id": 296504,"Record function id": 0, "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496786.844, "dur": 2.564, + "args": { + "External id": 296505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496787.341, "dur": 1.625, + "args": { + "External id": 296506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496787.805, "dur": 1.089, + "args": { + "External id": 296507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496792.968, "dur": 4.823, + "args": { + "External id": 296508,"Record function id": 0, "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496794.632, "dur": 2.714, + "args": { + "External id": 296509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496795.101, "dur": 1.840, + "args": { + "External id": 296510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496795.594, "dur": 1.287, + "args": { + "External id": 296511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496800.967, "dur": 4.277, + "args": { + "External id": 296512,"Record function id": 0, "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496802.509, "dur": 2.322, + "args": { + "External id": 296513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496802.940, "dur": 1.461, + "args": { + "External id": 296514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496803.484, "dur": 0.857, + "args": { + "External id": 296515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496808.353, "dur": 4.362, + "args": { + "External id": 296516,"Record function id": 0, "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496809.729, "dur": 2.554, + "args": { + "External id": 296517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496810.192, "dur": 1.655, + "args": { + "External id": 296518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496811.026, "dur": 0.759, + "args": { + "External id": 296519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496815.849, "dur": 7.244, + "args": { + "External id": 296520,"Record function id": 0, "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368496817.134, "dur": 5.530, + "args": { + "External id": 296521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496817.591, "dur": 4.667, + "args": { + "External id": 296522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368496821.393, "dur": 0.801, + "args": { + "External id": 296523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368496827.029, "dur": 37191.478, + "args": { + "External id": 296524,"Record function id": 0, "Sequence number": 1209183, "Fwd thread id": 1, "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368496828.428, "dur": 37181.534, + "args": { + "External id": 296525,"Sequence number": 1209183, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3148 + } + }, + { + "ph": "f", "id": 49, "pid": 2070547, "tid": 2107622, "ts": 5333368496828.428, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2070547, "tid": 2107622, + "ts": 5333368496857.609, "dur": 40.495, + "args": { + "External id": 296526,"Record function id": 0, "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2070547, "tid": 2107622, + "ts": 5333368496905.413, "dur": 59.047, + "args": { + "External id": 296527,"Record function id": 0, "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2070547, "tid": 2107622, + "ts": 5333368496970.734, "dur": 37031.449, + "args": { + "External id": 296528,"Record function id": 0, "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368497059.612, "dur": 6.540, + "args": { + "External id": 296529,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368497075.432, "dur": 4.518, + "args": { + "External id": 296530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368497093.036, "dur": 36106.520, + "args": { + "External id": 296531,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368497105.729, "dur": 36084.693, + "args": { + "External id": 296532,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368497139.564, "dur": 13.671, + "args": { + "External id": 296533,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368497159.322, "dur": 35981.822, + "args": { + "External id": 296534,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368497161.704, "dur": 35978.740, + "args": { + "External id": 296535,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368497164.887, "dur": 22.931, + "args": { + "External id": 296536,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368497190.192, "dur": 35946.494, + "args": { + "External id": 296537,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368533286.930, "dur": 8.276, + "args": { + "External id": 296538,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368533289.406, "dur": 5.481, + "args": { + "External id": 296539,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368533321.778, "dur": 403.443, + "args": { + "External id": 296540,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368533347.798, "dur": 372.301, + "args": { + "External id": 296541,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3164, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368533358.942, "dur": 355.771, + "args": { + "External id": 296542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368533747.238, "dur": 2.168, + "args": { + "External id": 296543,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3166, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533809.281, "dur": 6.662, + "args": { + "External id": 296544,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533858.517, "dur": 1.646, + "args": { + "External id": 296545,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533874.374, "dur": 1.270, + "args": { + "External id": 296546,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533887.289, "dur": 1.435, + "args": { + "External id": 296547,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533899.920, "dur": 1.056, + "args": { + "External id": 296548,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533910.380, "dur": 1.040, + "args": { + "External id": 296549,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533922.630, "dur": 1.023, + "args": { + "External id": 296550,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533933.382, "dur": 1.762, + "args": { + "External id": 296551,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368533944.387, "dur": 0.829, + "args": { + "External id": 296552,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368534032.596, "dur": 2684.475, + "args": { + "External id": 296553,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2070547, "tid": 2107622, + "ts": 5333368534051.800, "dur": 968.126, + "args": { + "External id": 296554,"Record function id": 0, "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070547, "tid": 2107622, + "ts": 5333368534064.645, "dur": 313.753, + "args": { + "External id": 296555,"Record function id": 0, "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534140.258, "dur": 4.028, + "args": { + "External id": 296556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534147.386, "dur": 0.847, + "args": { + "External id": 296557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534150.274, "dur": 0.717, + "args": { + "External id": 296558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534152.766, "dur": 1.063, + "args": { + "External id": 296559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534155.197, "dur": 1.005, + "args": { + "External id": 296560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534158.293, "dur": 0.973, + "args": { + "External id": 296561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534161.058, "dur": 1.808, + "args": { + "External id": 296562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534164.423, "dur": 0.608, + "args": { + "External id": 296563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534183.320, "dur": 1.329, + "args": { + "External id": 296564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368534186.551, "dur": 0.740, + "args": { + "External id": 296565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368534207.226, "dur": 142.201, + "args": { + "External id": 296566,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368534223.142, "dur": 121.757, + "args": { + "External id": 296567,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368534234.801, "dur": 12.589, + "args": { + "External id": 296568,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368534251.333, "dur": 65.817, + "args": { + "External id": 296569,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368534253.982, "dur": 62.788, + "args": { + "External id": 296570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534257.963, "dur": 6.582, + "args": { + "External id": 296571,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368534266.100, "dur": 50.145, + "args": { + "External id": 296572,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2070547, "tid": 2107622, + "ts": 5333368534452.578, "dur": 559.318, + "args": { + "External id": 296573,"Record function id": 0, "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070547, "tid": 2107622, + "ts": 5333368534469.732, "dur": 529.273, + "args": { + "External id": 296574,"Record function id": 0, "Ev Idx": 3197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368534525.017, "dur": 5.239, + "args": { + "External id": 296575,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368534544.915, "dur": 25.534, + "args": { + "External id": 296576,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534549.522, "dur": 1.465, + "args": { + "External id": 296577,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534552.894, "dur": 1.025, + "args": { + "External id": 296578,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534555.015, "dur": 0.620, + "args": { + "External id": 296579,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534557.464, "dur": 0.415, + "args": { + "External id": 296580,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534559.272, "dur": 0.655, + "args": { + "External id": 296581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534561.463, "dur": 0.570, + "args": { + "External id": 296582,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534563.362, "dur": 0.848, + "args": { + "External id": 296583,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534565.081, "dur": 0.445, + "args": { + "External id": 296584,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534567.126, "dur": 0.561, + "args": { + "External id": 296585,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368534579.775, "dur": 29.635, + "args": { + "External id": 296586,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368534679.312, "dur": 100.692, + "args": { + "External id": 296587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368534689.882, "dur": 4.914, + "args": { + "External id": 296588,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368534699.914, "dur": 10.579, + "args": { + "External id": 296589,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368534703.800, "dur": 6.308, + "args": { + "External id": 296590,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534707.335, "dur": 1.185, + "args": { + "External id": 296591,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368534718.830, "dur": 21.036, + "args": { + "External id": 296592,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534720.784, "dur": 0.415, + "args": { + "External id": 296593,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534722.588, "dur": 0.603, + "args": { + "External id": 296594,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534724.273, "dur": 0.799, + "args": { + "External id": 296595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534726.117, "dur": 0.709, + "args": { + "External id": 296596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534727.838, "dur": 0.546, + "args": { + "External id": 296597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534729.640, "dur": 0.781, + "args": { + "External id": 296598,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534731.653, "dur": 0.657, + "args": { + "External id": 296599,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534733.481, "dur": 0.413, + "args": { + "External id": 296600,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368534735.181, "dur": 0.632, + "args": { + "External id": 296601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368534750.081, "dur": 22.410, + "args": { + "External id": 296602,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368534823.316, "dur": 112.345, + "args": { + "External id": 296603,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368534848.503, "dur": 83.929, + "args": { + "External id": 296604,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3227, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368534857.540, "dur": 70.542, + "args": { + "External id": 296605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368534950.380, "dur": 2.020, + "args": { + "External id": 296606,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3229, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368535026.089, "dur": 1669.390, + "args": { + "External id": 296607,"Sequence number": 1209182, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3230 + } + }, + { + "ph": "f", "id": 50, "pid": 2070547, "tid": 2107622, "ts": 5333368535026.089, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535130.221, "dur": 123.443, + "args": { + "External id": 296608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368535318.190, "dur": 42.714, + "args": { + "External id": 296609,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535379.139, "dur": 52.728, + "args": { + "External id": 296610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535443.462, "dur": 32.564, + "args": { + "External id": 296611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535482.210, "dur": 45.097, + "args": { + "External id": 296612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535534.473, "dur": 27.037, + "args": { + "External id": 296613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535568.602, "dur": 41.109, + "args": { + "External id": 296614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368535685.724, "dur": 26.332, + "args": { + "External id": 296615,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368535734.237, "dur": 29.426, + "args": { + "External id": 296616,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368535784.751, "dur": 19.803, + "args": { + "External id": 296617,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368535817.036, "dur": 14.218, + "args": { + "External id": 296618,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535839.967, "dur": 32.639, + "args": { + "External id": 296619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368535875.921, "dur": 33.303, + "args": { + "External id": 296620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368535937.147, "dur": 172.372, + "args": { + "External id": 296621,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368536015.267, "dur": 6.362, + "args": { + "External id": 296622,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368536023.441, "dur": 3.598, + "args": { + "External id": 296623,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368536142.000, "dur": 23.014, + "args": { + "External id": 296624,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368536197.603, "dur": 16.424, + "args": { + "External id": 296625,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368536223.353, "dur": 38.228, + "args": { + "External id": 296626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368536267.771, "dur": 34.767, + "args": { + "External id": 296627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368536309.442, "dur": 20.226, + "args": { + "External id": 296628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368536334.634, "dur": 28.997, + "args": { + "External id": 296629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368536369.308, "dur": 20.588, + "args": { + "External id": 296630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368536396.703, "dur": 29.389, + "args": { + "External id": 296631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368536443.103, "dur": 25.090, + "args": { + "External id": 296632,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368536484.613, "dur": 44.043, + "args": { + "External id": 296633,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368536548.842, "dur": 16.292, + "args": { + "External id": 296634,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368536580.970, "dur": 15.509, + "args": { + "External id": 296635,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368536609.389, "dur": 50.530, + "args": { + "External id": 296636,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536739.136, "dur": 17.781, + "args": { + "External id": 296637,"Record function id": 0, "Ev Idx": 3260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536742.478, "dur": 13.426, + "args": { + "External id": 296638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536746.609, "dur": 5.379, + "args": { + "External id": 296639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536747.880, "dur": 4.022, + "args": { + "External id": 296640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536760.877, "dur": 4.515, + "args": { + "External id": 296641,"Record function id": 0, "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536762.287, "dur": 2.687, + "args": { + "External id": 296642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536763.033, "dur": 1.505, + "args": { + "External id": 296643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536763.384, "dur": 1.062, + "args": { + "External id": 296644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536768.797, "dur": 4.191, + "args": { + "External id": 296645,"Record function id": 0, "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536769.907, "dur": 2.674, + "args": { + "External id": 296646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536770.499, "dur": 1.681, + "args": { + "External id": 296647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536770.930, "dur": 1.165, + "args": { + "External id": 296648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536776.189, "dur": 3.831, + "args": { + "External id": 296649,"Record function id": 0, "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536777.611, "dur": 2.002, + "args": { + "External id": 296650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536778.091, "dur": 1.095, + "args": { + "External id": 296651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536778.444, "dur": 0.669, + "args": { + "External id": 296652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536783.224, "dur": 3.982, + "args": { + "External id": 296653,"Record function id": 0, "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536784.425, "dur": 2.381, + "args": { + "External id": 296654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536784.875, "dur": 1.501, + "args": { + "External id": 296655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536785.553, "dur": 0.749, + "args": { + "External id": 296656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536790.289, "dur": 3.676, + "args": { + "External id": 296657,"Record function id": 0, "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536791.257, "dur": 2.283, + "args": { + "External id": 296658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536791.829, "dur": 1.292, + "args": { + "External id": 296659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536792.227, "dur": 0.821, + "args": { + "External id": 296660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536797.112, "dur": 4.560, + "args": { + "External id": 296661,"Record function id": 0, "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536798.097, "dur": 3.154, + "args": { + "External id": 296662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536798.738, "dur": 2.110, + "args": { + "External id": 296663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536799.848, "dur": 0.925, + "args": { + "External id": 296664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536804.715, "dur": 4.111, + "args": { + "External id": 296665,"Record function id": 0, "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536805.871, "dur": 2.568, + "args": { + "External id": 296666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536806.321, "dur": 1.702, + "args": { + "External id": 296667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536807.035, "dur": 0.910, + "args": { + "External id": 296668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536812.182, "dur": 4.223, + "args": { + "External id": 296669,"Record function id": 0, "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368536813.366, "dur": 2.623, + "args": { + "External id": 296670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536813.917, "dur": 1.678, + "args": { + "External id": 296671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368536814.658, "dur": 0.834, + "args": { + "External id": 296672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368536820.264, "dur": 36731.829, + "args": { + "External id": 296673,"Record function id": 0, "Sequence number": 1209181, "Fwd thread id": 1, "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368536821.479, "dur": 36722.274, + "args": { + "External id": 296674,"Sequence number": 1209181, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3297 + } + }, + { + "ph": "f", "id": 51, "pid": 2070547, "tid": 2107622, "ts": 5333368536821.479, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2070547, "tid": 2107622, + "ts": 5333368536852.695, "dur": 33.994, + "args": { + "External id": 296675,"Record function id": 0, "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2070547, "tid": 2107622, + "ts": 5333368536894.092, "dur": 64.413, + "args": { + "External id": 296676,"Record function id": 0, "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2070547, "tid": 2107622, + "ts": 5333368536964.601, "dur": 36572.454, + "args": { + "External id": 296677,"Record function id": 0, "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368537048.621, "dur": 7.143, + "args": { + "External id": 296678,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368537065.418, "dur": 4.681, + "args": { + "External id": 296679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368537083.800, "dur": 35623.790, + "args": { + "External id": 296680,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368537097.812, "dur": 35600.822, + "args": { + "External id": 296681,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368537144.236, "dur": 17.556, + "args": { + "External id": 296682,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368537185.037, "dur": 35476.909, + "args": { + "External id": 296683,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368537189.036, "dur": 35472.234, + "args": { + "External id": 296684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368537192.661, "dur": 6.517, + "args": { + "External id": 296685,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368537201.304, "dur": 35456.387, + "args": { + "External id": 296686,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368572793.423, "dur": 9.879, + "args": { + "External id": 296687,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368572796.046, "dur": 6.876, + "args": { + "External id": 296688,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368572830.984, "dur": 430.364, + "args": { + "External id": 296689,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368572855.590, "dur": 400.352, + "args": { + "External id": 296690,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3313, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368572866.365, "dur": 383.042, + "args": { + "External id": 296691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368573282.897, "dur": 1.841, + "args": { + "External id": 296692,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3315, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573345.464, "dur": 6.756, + "args": { + "External id": 296693,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573396.219, "dur": 1.350, + "args": { + "External id": 296694,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573412.133, "dur": 1.278, + "args": { + "External id": 296695,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573424.196, "dur": 1.085, + "args": { + "External id": 296696,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573436.672, "dur": 0.888, + "args": { + "External id": 296697,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573447.343, "dur": 0.861, + "args": { + "External id": 296698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573458.278, "dur": 1.022, + "args": { + "External id": 296699,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573469.064, "dur": 1.398, + "args": { + "External id": 296700,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573479.567, "dur": 0.797, + "args": { + "External id": 296701,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368573566.268, "dur": 2617.280, + "args": { + "External id": 296702,"Record function id": 0, "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2070547, "tid": 2107622, + "ts": 5333368573585.395, "dur": 949.079, + "args": { + "External id": 296703,"Record function id": 0, "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070547, "tid": 2107622, + "ts": 5333368573599.052, "dur": 332.302, + "args": { + "External id": 296704,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573711.510, "dur": 4.701, + "args": { + "External id": 296705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573719.515, "dur": 0.954, + "args": { + "External id": 296706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573722.447, "dur": 0.831, + "args": { + "External id": 296707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573725.267, "dur": 1.088, + "args": { + "External id": 296708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573728.283, "dur": 1.335, + "args": { + "External id": 296709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573731.141, "dur": 1.320, + "args": { + "External id": 296710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573734.034, "dur": 0.752, + "args": { + "External id": 296711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573736.391, "dur": 1.282, + "args": { + "External id": 296712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573739.234, "dur": 1.275, + "args": { + "External id": 296713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368573741.899, "dur": 1.055, + "args": { + "External id": 296714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368573761.371, "dur": 142.157, + "args": { + "External id": 296715,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368573777.678, "dur": 121.339, + "args": { + "External id": 296716,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368573790.469, "dur": 12.671, + "args": { + "External id": 296717,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368573807.102, "dur": 65.494, + "args": { + "External id": 296718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368573809.488, "dur": 62.769, + "args": { + "External id": 296719,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368573812.996, "dur": 6.446, + "args": { + "External id": 296720,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368573821.149, "dur": 50.508, + "args": { + "External id": 296721,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2070547, "tid": 2107622, + "ts": 5333368574002.869, "dur": 523.913, + "args": { + "External id": 296722,"Record function id": 0, "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070547, "tid": 2107622, + "ts": 5333368574019.256, "dur": 494.920, + "args": { + "External id": 296723,"Record function id": 0, "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368574071.849, "dur": 4.846, + "args": { + "External id": 296724,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368574090.916, "dur": 29.295, + "args": { + "External id": 296725,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574095.160, "dur": 1.917, + "args": { + "External id": 296726,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574098.659, "dur": 0.810, + "args": { + "External id": 296727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574100.641, "dur": 0.389, + "args": { + "External id": 296728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574103.348, "dur": 0.689, + "args": { + "External id": 296729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574105.678, "dur": 0.736, + "args": { + "External id": 296730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574107.683, "dur": 0.603, + "args": { + "External id": 296731,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574110.994, "dur": 0.656, + "args": { + "External id": 296732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574113.089, "dur": 0.490, + "args": { + "External id": 296733,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574114.851, "dur": 1.834, + "args": { + "External id": 296734,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368574130.391, "dur": 28.021, + "args": { + "External id": 296735,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368574205.867, "dur": 97.742, + "args": { + "External id": 296736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368574216.440, "dur": 4.916, + "args": { + "External id": 296737,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368574226.321, "dur": 9.559, + "args": { + "External id": 296738,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368574229.976, "dur": 5.524, + "args": { + "External id": 296739,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574233.154, "dur": 0.801, + "args": { + "External id": 296740,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368574242.243, "dur": 23.272, + "args": { + "External id": 296741,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574244.425, "dur": 0.606, + "args": { + "External id": 296742,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574246.741, "dur": 0.488, + "args": { + "External id": 296743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574248.831, "dur": 1.203, + "args": { + "External id": 296744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574251.000, "dur": 0.587, + "args": { + "External id": 296745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574253.007, "dur": 0.593, + "args": { + "External id": 296746,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574255.351, "dur": 0.766, + "args": { + "External id": 296747,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574257.111, "dur": 0.395, + "args": { + "External id": 296748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574258.806, "dur": 0.455, + "args": { + "External id": 296749,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368574260.555, "dur": 0.649, + "args": { + "External id": 296750,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368574275.970, "dur": 20.270, + "args": { + "External id": 296751,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368574345.556, "dur": 109.415, + "args": { + "External id": 296752,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368574368.275, "dur": 83.542, + "args": { + "External id": 296753,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3376, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368574377.311, "dur": 68.914, + "args": { + "External id": 296754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368574468.069, "dur": 1.839, + "args": { + "External id": 296755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3378, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368574541.980, "dur": 1603.377, + "args": { + "External id": 296756,"Sequence number": 1209180, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3379 + } + }, + { + "ph": "f", "id": 52, "pid": 2070547, "tid": 2107622, "ts": 5333368574541.980, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368574685.026, "dur": 105.546, + "args": { + "External id": 296757,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368574831.300, "dur": 37.437, + "args": { + "External id": 296758,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368574885.479, "dur": 48.696, + "args": { + "External id": 296759,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368574944.032, "dur": 32.655, + "args": { + "External id": 296760,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368574983.240, "dur": 45.031, + "args": { + "External id": 296761,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575034.708, "dur": 26.991, + "args": { + "External id": 296762,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575069.080, "dur": 41.218, + "args": { + "External id": 296763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368575132.803, "dur": 24.506, + "args": { + "External id": 296764,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368575195.010, "dur": 32.608, + "args": { + "External id": 296765,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368575249.973, "dur": 19.941, + "args": { + "External id": 296766,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368575283.893, "dur": 15.706, + "args": { + "External id": 296767,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575307.469, "dur": 31.274, + "args": { + "External id": 296768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575341.927, "dur": 32.232, + "args": { + "External id": 296769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368575402.065, "dur": 167.897, + "args": { + "External id": 296770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368575478.885, "dur": 5.988, + "args": { + "External id": 296771,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368575486.883, "dur": 2.984, + "args": { + "External id": 296772,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368575598.385, "dur": 60.296, + "args": { + "External id": 296773,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368575673.270, "dur": 18.136, + "args": { + "External id": 296774,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575701.659, "dur": 39.591, + "args": { + "External id": 296775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575747.439, "dur": 38.534, + "args": { + "External id": 296776,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575792.187, "dur": 24.259, + "args": { + "External id": 296777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575822.327, "dur": 30.164, + "args": { + "External id": 296778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575858.300, "dur": 23.413, + "args": { + "External id": 296779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368575888.485, "dur": 28.879, + "args": { + "External id": 296780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368575938.896, "dur": 22.127, + "args": { + "External id": 296781,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368575978.130, "dur": 23.042, + "args": { + "External id": 296782,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368576014.915, "dur": 16.088, + "args": { + "External id": 296783,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368576047.547, "dur": 32.648, + "args": { + "External id": 296784,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368576099.295, "dur": 16.796, + "args": { + "External id": 296785,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576206.525, "dur": 15.101, + "args": { + "External id": 296786,"Record function id": 0, "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576209.744, "dur": 10.907, + "args": { + "External id": 296787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576213.818, "dur": 5.648, + "args": { + "External id": 296788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576215.077, "dur": 4.306, + "args": { + "External id": 296789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576225.577, "dur": 3.981, + "args": { + "External id": 296790,"Record function id": 0, "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576226.777, "dur": 2.262, + "args": { + "External id": 296791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576227.264, "dur": 1.317, + "args": { + "External id": 296792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576227.561, "dur": 0.936, + "args": { + "External id": 296793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576232.983, "dur": 7.147, + "args": { + "External id": 296794,"Record function id": 0, "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576234.317, "dur": 5.397, + "args": { + "External id": 296795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576234.825, "dur": 1.587, + "args": { + "External id": 296796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576235.203, "dur": 1.141, + "args": { + "External id": 296797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576243.356, "dur": 3.796, + "args": { + "External id": 296798,"Record function id": 0, "Ev Idx": 3421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576244.502, "dur": 2.220, + "args": { + "External id": 296799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576244.951, "dur": 1.365, + "args": { + "External id": 296800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576245.238, "dur": 1.012, + "args": { + "External id": 296801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576250.274, "dur": 3.451, + "args": { + "External id": 296802,"Record function id": 0, "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576251.237, "dur": 2.087, + "args": { + "External id": 296803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576251.708, "dur": 1.224, + "args": { + "External id": 296804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576252.124, "dur": 0.743, + "args": { + "External id": 296805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576256.768, "dur": 4.037, + "args": { + "External id": 296806,"Record function id": 0, "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576258.022, "dur": 2.388, + "args": { + "External id": 296807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576258.639, "dur": 1.325, + "args": { + "External id": 296808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576259.074, "dur": 0.822, + "args": { + "External id": 296809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576263.979, "dur": 3.765, + "args": { + "External id": 296810,"Record function id": 0, "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576264.970, "dur": 2.357, + "args": { + "External id": 296811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576265.409, "dur": 1.519, + "args": { + "External id": 296812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576266.117, "dur": 0.739, + "args": { + "External id": 296813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576270.836, "dur": 3.771, + "args": { + "External id": 296814,"Record function id": 0, "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576271.795, "dur": 2.400, + "args": { + "External id": 296815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576272.246, "dur": 1.535, + "args": { + "External id": 296816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576272.939, "dur": 0.774, + "args": { + "External id": 296817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576277.877, "dur": 3.623, + "args": { + "External id": 296818,"Record function id": 0, "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368576278.934, "dur": 2.162, + "args": { + "External id": 296819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576279.359, "dur": 1.340, + "args": { + "External id": 296820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368576279.897, "dur": 0.739, + "args": { + "External id": 296821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368576285.787, "dur": 36784.969, + "args": { + "External id": 296822,"Record function id": 0, "Sequence number": 1209179, "Fwd thread id": 1, "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368576286.989, "dur": 36774.861, + "args": { + "External id": 296823,"Sequence number": 1209179, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3446 + } + }, + { + "ph": "f", "id": 53, "pid": 2070547, "tid": 2107622, "ts": 5333368576286.989, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2070547, "tid": 2107622, + "ts": 5333368576319.557, "dur": 39.476, + "args": { + "External id": 296824,"Record function id": 0, "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2070547, "tid": 2107622, + "ts": 5333368576367.401, "dur": 58.630, + "args": { + "External id": 296825,"Record function id": 0, "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2070547, "tid": 2107622, + "ts": 5333368576431.720, "dur": 36621.952, + "args": { + "External id": 296826,"Record function id": 0, "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368576520.952, "dur": 7.392, + "args": { + "External id": 296827,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368576537.155, "dur": 4.461, + "args": { + "External id": 296828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368576555.562, "dur": 35757.865, + "args": { + "External id": 296829,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368576568.096, "dur": 35737.069, + "args": { + "External id": 296830,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368576601.398, "dur": 14.855, + "args": { + "External id": 296831,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368576658.986, "dur": 35609.815, + "args": { + "External id": 296832,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368576662.781, "dur": 35605.495, + "args": { + "External id": 296833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368576666.413, "dur": 6.162, + "args": { + "External id": 296834,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368576674.245, "dur": 35590.553, + "args": { + "External id": 296835,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368612400.176, "dur": 9.018, + "args": { + "External id": 296836,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368612402.935, "dur": 5.801, + "args": { + "External id": 296837,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368612437.653, "dur": 338.486, + "args": { + "External id": 296838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368612462.838, "dur": 308.314, + "args": { + "External id": 296839,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3462, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368612474.230, "dur": 290.701, + "args": { + "External id": 296840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368612797.677, "dur": 2.209, + "args": { + "External id": 296841,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3464, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612858.300, "dur": 6.722, + "args": { + "External id": 296842,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612906.963, "dur": 1.288, + "args": { + "External id": 296843,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612921.984, "dur": 1.473, + "args": { + "External id": 296844,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612934.321, "dur": 0.803, + "args": { + "External id": 296845,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612945.976, "dur": 1.405, + "args": { + "External id": 296846,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612957.243, "dur": 1.226, + "args": { + "External id": 296847,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612969.401, "dur": 1.325, + "args": { + "External id": 296848,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612980.854, "dur": 1.254, + "args": { + "External id": 296849,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368612990.678, "dur": 1.096, + "args": { + "External id": 296850,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368613085.951, "dur": 2677.175, + "args": { + "External id": 296851,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2070547, "tid": 2107622, + "ts": 5333368613105.093, "dur": 983.837, + "args": { + "External id": 296852,"Record function id": 0, "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070547, "tid": 2107622, + "ts": 5333368613120.086, "dur": 321.351, + "args": { + "External id": 296853,"Record function id": 0, "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613212.775, "dur": 5.108, + "args": { + "External id": 296854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613221.348, "dur": 1.034, + "args": { + "External id": 296855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613224.329, "dur": 1.119, + "args": { + "External id": 296856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613227.276, "dur": 1.047, + "args": { + "External id": 296857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613230.507, "dur": 1.215, + "args": { + "External id": 296858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613233.115, "dur": 1.346, + "args": { + "External id": 296859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613236.319, "dur": 1.202, + "args": { + "External id": 296860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613239.388, "dur": 1.183, + "args": { + "External id": 296861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613242.634, "dur": 1.258, + "args": { + "External id": 296862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368613245.410, "dur": 1.345, + "args": { + "External id": 296863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368613265.920, "dur": 146.572, + "args": { + "External id": 296864,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368613282.684, "dur": 125.360, + "args": { + "External id": 296865,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368613296.404, "dur": 13.683, + "args": { + "External id": 296866,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368613314.318, "dur": 66.182, + "args": { + "External id": 296867,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368613316.811, "dur": 63.348, + "args": { + "External id": 296868,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613320.383, "dur": 6.414, + "args": { + "External id": 296869,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368613328.354, "dur": 51.304, + "args": { + "External id": 296870,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2070547, "tid": 2107622, + "ts": 5333368613516.004, "dur": 565.368, + "args": { + "External id": 296871,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070547, "tid": 2107622, + "ts": 5333368613531.614, "dur": 536.516, + "args": { + "External id": 296872,"Record function id": 0, "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368613585.867, "dur": 5.226, + "args": { + "External id": 296873,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368613606.349, "dur": 68.686, + "args": { + "External id": 296874,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613610.667, "dur": 2.019, + "args": { + "External id": 296875,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613614.489, "dur": 0.586, + "args": { + "External id": 296876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613616.423, "dur": 0.704, + "args": { + "External id": 296877,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613655.343, "dur": 1.064, + "args": { + "External id": 296878,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613659.386, "dur": 0.729, + "args": { + "External id": 296879,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613661.301, "dur": 0.748, + "args": { + "External id": 296880,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613665.205, "dur": 1.021, + "args": { + "External id": 296881,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613667.539, "dur": 0.669, + "args": { + "External id": 296882,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613669.675, "dur": 1.043, + "args": { + "External id": 296883,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368613686.335, "dur": 32.293, + "args": { + "External id": 296884,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368613752.240, "dur": 97.102, + "args": { + "External id": 296885,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368613762.400, "dur": 5.226, + "args": { + "External id": 296886,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368613772.804, "dur": 9.406, + "args": { + "External id": 296887,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368613776.693, "dur": 5.099, + "args": { + "External id": 296888,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613779.841, "dur": 0.690, + "args": { + "External id": 296889,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368613789.426, "dur": 23.868, + "args": { + "External id": 296890,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613791.268, "dur": 0.792, + "args": { + "External id": 296891,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613793.427, "dur": 1.283, + "args": { + "External id": 296892,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613796.065, "dur": 0.567, + "args": { + "External id": 296893,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613797.885, "dur": 0.796, + "args": { + "External id": 296894,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613800.297, "dur": 0.701, + "args": { + "External id": 296895,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613802.399, "dur": 0.626, + "args": { + "External id": 296896,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613804.111, "dur": 0.806, + "args": { + "External id": 296897,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613807.339, "dur": 0.604, + "args": { + "External id": 296898,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368613809.229, "dur": 0.769, + "args": { + "External id": 296899,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368613823.609, "dur": 18.146, + "args": { + "External id": 296900,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368613893.648, "dur": 110.813, + "args": { + "External id": 296901,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368613918.161, "dur": 83.291, + "args": { + "External id": 296902,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3525, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368613927.463, "dur": 69.764, + "args": { + "External id": 296903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368614018.525, "dur": 2.058, + "args": { + "External id": 296904,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3527, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368614095.776, "dur": 1646.611, + "args": { + "External id": 296905,"Sequence number": 1209178, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3528 + } + }, + { + "ph": "f", "id": 54, "pid": 2070547, "tid": 2107622, "ts": 5333368614095.776, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614220.099, "dur": 106.119, + "args": { + "External id": 296906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368614366.442, "dur": 38.260, + "args": { + "External id": 296907,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614420.986, "dur": 48.217, + "args": { + "External id": 296908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614479.226, "dur": 31.945, + "args": { + "External id": 296909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614517.735, "dur": 43.764, + "args": { + "External id": 296910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614567.887, "dur": 27.011, + "args": { + "External id": 296911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614602.439, "dur": 81.292, + "args": { + "External id": 296912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368614710.938, "dur": 24.635, + "args": { + "External id": 296913,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368614753.651, "dur": 30.373, + "args": { + "External id": 296914,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368614804.881, "dur": 19.160, + "args": { + "External id": 296915,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368614836.257, "dur": 16.856, + "args": { + "External id": 296916,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614861.196, "dur": 31.730, + "args": { + "External id": 296917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368614896.300, "dur": 32.729, + "args": { + "External id": 296918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368614958.755, "dur": 170.039, + "args": { + "External id": 296919,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368615036.386, "dur": 6.450, + "args": { + "External id": 296920,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368615044.861, "dur": 2.881, + "args": { + "External id": 296921,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368615162.009, "dur": 47.550, + "args": { + "External id": 296922,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368615223.420, "dur": 16.489, + "args": { + "External id": 296923,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368615249.208, "dur": 41.805, + "args": { + "External id": 296924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368615297.014, "dur": 37.781, + "args": { + "External id": 296925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368615342.009, "dur": 22.235, + "args": { + "External id": 296926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368615369.120, "dur": 30.856, + "args": { + "External id": 296927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368615405.572, "dur": 20.615, + "args": { + "External id": 296928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368615433.529, "dur": 32.707, + "args": { + "External id": 296929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368615483.411, "dur": 24.688, + "args": { + "External id": 296930,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368615524.229, "dur": 23.475, + "args": { + "External id": 296931,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368615571.599, "dur": 27.561, + "args": { + "External id": 296932,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368615651.642, "dur": 20.383, + "args": { + "External id": 296933,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368615690.902, "dur": 18.518, + "args": { + "External id": 296934,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615788.852, "dur": 14.835, + "args": { + "External id": 296935,"Record function id": 0, "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615792.176, "dur": 10.572, + "args": { + "External id": 296936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615796.346, "dur": 5.561, + "args": { + "External id": 296937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615797.676, "dur": 4.152, + "args": { + "External id": 296938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615807.510, "dur": 4.151, + "args": { + "External id": 296939,"Record function id": 0, "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615808.891, "dur": 2.328, + "args": { + "External id": 296940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615809.513, "dur": 1.232, + "args": { + "External id": 296941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615809.837, "dur": 0.842, + "args": { + "External id": 296942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615815.147, "dur": 4.438, + "args": { + "External id": 296943,"Record function id": 0, "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615816.461, "dur": 2.679, + "args": { + "External id": 296944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615817.145, "dur": 1.540, + "args": { + "External id": 296945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615817.505, "dur": 1.094, + "args": { + "External id": 296946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615822.870, "dur": 3.987, + "args": { + "External id": 296947,"Record function id": 0, "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615824.257, "dur": 2.207, + "args": { + "External id": 296948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615824.739, "dur": 1.260, + "args": { + "External id": 296949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615825.072, "dur": 0.845, + "args": { + "External id": 296950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615830.011, "dur": 3.649, + "args": { + "External id": 296951,"Record function id": 0, "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615830.961, "dur": 2.245, + "args": { + "External id": 296952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615831.467, "dur": 1.273, + "args": { + "External id": 296953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615831.857, "dur": 0.784, + "args": { + "External id": 296954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615836.881, "dur": 4.347, + "args": { + "External id": 296955,"Record function id": 0, "Ev Idx": 3578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615838.355, "dur": 2.484, + "args": { + "External id": 296956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615838.834, "dur": 1.591, + "args": { + "External id": 296957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615839.113, "dur": 1.226, + "args": { + "External id": 296958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615844.375, "dur": 15.874, + "args": { + "External id": 296959,"Record function id": 0, "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615845.636, "dur": 14.154, + "args": { + "External id": 296960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615857.511, "dur": 1.808, + "args": { + "External id": 296961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615857.966, "dur": 1.287, + "args": { + "External id": 296962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615863.663, "dur": 3.620, + "args": { + "External id": 296963,"Record function id": 0, "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615864.709, "dur": 2.155, + "args": { + "External id": 296964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615865.162, "dur": 1.277, + "args": { + "External id": 296965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615865.567, "dur": 0.809, + "args": { + "External id": 296966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615870.635, "dur": 3.928, + "args": { + "External id": 296967,"Record function id": 0, "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368615871.845, "dur": 2.282, + "args": { + "External id": 296968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615872.409, "dur": 1.294, + "args": { + "External id": 296969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368615872.685, "dur": 0.951, + "args": { + "External id": 296970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368615878.925, "dur": 37560.613, + "args": { + "External id": 296971,"Record function id": 0, "Sequence number": 1209177, "Fwd thread id": 1, "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368615880.301, "dur": 37550.928, + "args": { + "External id": 296972,"Sequence number": 1209177, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3595 + } + }, + { + "ph": "f", "id": 55, "pid": 2070547, "tid": 2107622, "ts": 5333368615880.301, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2070547, "tid": 2107622, + "ts": 5333368615909.463, "dur": 37.941, + "args": { + "External id": 296973,"Record function id": 0, "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2070547, "tid": 2107622, + "ts": 5333368615955.627, "dur": 59.428, + "args": { + "External id": 296974,"Record function id": 0, "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2070547, "tid": 2107622, + "ts": 5333368616020.491, "dur": 37403.071, + "args": { + "External id": 296975,"Record function id": 0, "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368616109.944, "dur": 7.353, + "args": { + "External id": 296976,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368616126.301, "dur": 5.047, + "args": { + "External id": 296977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368616144.479, "dur": 36439.699, + "args": { + "External id": 296978,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368616157.974, "dur": 36417.782, + "args": { + "External id": 296979,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368616219.830, "dur": 15.143, + "args": { + "External id": 296980,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368616241.329, "dur": 36295.528, + "args": { + "External id": 296981,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368616243.655, "dur": 36292.414, + "args": { + "External id": 296982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368616247.123, "dur": 6.813, + "args": { + "External id": 296983,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368616255.606, "dur": 36277.115, + "args": { + "External id": 296984,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368652697.112, "dur": 9.325, + "args": { + "External id": 296985,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368652699.678, "dur": 6.175, + "args": { + "External id": 296986,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368652735.898, "dur": 393.721, + "args": { + "External id": 296987,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368652759.929, "dur": 364.821, + "args": { + "External id": 296988,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3611, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368652771.449, "dur": 348.323, + "args": { + "External id": 296989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368653147.581, "dur": 2.133, + "args": { + "External id": 296990,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3613, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653227.149, "dur": 6.840, + "args": { + "External id": 296991,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653274.990, "dur": 1.541, + "args": { + "External id": 296992,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653290.585, "dur": 1.354, + "args": { + "External id": 296993,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653302.699, "dur": 0.966, + "args": { + "External id": 296994,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653317.582, "dur": 1.001, + "args": { + "External id": 296995,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653328.849, "dur": 1.085, + "args": { + "External id": 296996,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653339.681, "dur": 1.159, + "args": { + "External id": 296997,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653351.941, "dur": 0.977, + "args": { + "External id": 296998,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653362.575, "dur": 1.076, + "args": { + "External id": 296999,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368653455.159, "dur": 2609.920, + "args": { + "External id": 297000,"Record function id": 0, "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2070547, "tid": 2107622, + "ts": 5333368653474.179, "dur": 959.373, + "args": { + "External id": 297001,"Record function id": 0, "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070547, "tid": 2107622, + "ts": 5333368653487.723, "dur": 325.135, + "args": { + "External id": 297002,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653559.619, "dur": 3.851, + "args": { + "External id": 297003,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653566.567, "dur": 1.239, + "args": { + "External id": 297004,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653569.384, "dur": 0.826, + "args": { + "External id": 297005,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653571.724, "dur": 0.899, + "args": { + "External id": 297006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653574.829, "dur": 1.055, + "args": { + "External id": 297007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653577.375, "dur": 0.983, + "args": { + "External id": 297008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653579.824, "dur": 0.823, + "args": { + "External id": 297009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653582.073, "dur": 1.079, + "args": { + "External id": 297010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653584.524, "dur": 0.786, + "args": { + "External id": 297011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368653586.488, "dur": 1.064, + "args": { + "External id": 297012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368653604.278, "dur": 176.922, + "args": { + "External id": 297013,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368653652.947, "dur": 122.944, + "args": { + "External id": 297014,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368653667.066, "dur": 12.502, + "args": { + "External id": 297015,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368653683.468, "dur": 62.945, + "args": { + "External id": 297016,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368653685.713, "dur": 60.431, + "args": { + "External id": 297017,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653689.360, "dur": 5.752, + "args": { + "External id": 297018,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368653696.577, "dur": 49.027, + "args": { + "External id": 297019,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2070547, "tid": 2107622, + "ts": 5333368653889.699, "dur": 536.256, + "args": { + "External id": 297020,"Record function id": 0, "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070547, "tid": 2107622, + "ts": 5333368653906.367, "dur": 507.111, + "args": { + "External id": 297021,"Record function id": 0, "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368653960.867, "dur": 5.307, + "args": { + "External id": 297022,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368653981.303, "dur": 26.962, + "args": { + "External id": 297023,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653986.017, "dur": 1.787, + "args": { + "External id": 297024,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653989.442, "dur": 0.726, + "args": { + "External id": 297025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653991.205, "dur": 0.754, + "args": { + "External id": 297026,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653993.665, "dur": 0.764, + "args": { + "External id": 297027,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653995.632, "dur": 0.413, + "args": { + "External id": 297028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653997.256, "dur": 0.594, + "args": { + "External id": 297029,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368653999.159, "dur": 0.839, + "args": { + "External id": 297030,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654001.856, "dur": 0.830, + "args": { + "External id": 297031,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654004.058, "dur": 0.632, + "args": { + "External id": 297032,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368654018.397, "dur": 30.942, + "args": { + "External id": 297033,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368654081.283, "dur": 112.860, + "args": { + "External id": 297034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368654090.796, "dur": 3.461, + "args": { + "External id": 297035,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368654099.029, "dur": 9.446, + "args": { + "External id": 297036,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368654102.808, "dur": 5.264, + "args": { + "External id": 297037,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654105.951, "dur": 0.828, + "args": { + "External id": 297038,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368654115.032, "dur": 23.025, + "args": { + "External id": 297039,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654116.958, "dur": 0.478, + "args": { + "External id": 297040,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654118.624, "dur": 0.697, + "args": { + "External id": 297041,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654120.844, "dur": 0.749, + "args": { + "External id": 297042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654123.547, "dur": 0.597, + "args": { + "External id": 297043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654125.384, "dur": 0.600, + "args": { + "External id": 297044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654127.139, "dur": 1.291, + "args": { + "External id": 297045,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654129.680, "dur": 0.592, + "args": { + "External id": 297046,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654131.417, "dur": 0.836, + "args": { + "External id": 297047,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368654133.736, "dur": 1.047, + "args": { + "External id": 297048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368654147.334, "dur": 36.578, + "args": { + "External id": 297049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368654238.309, "dur": 109.840, + "args": { + "External id": 297050,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368654259.597, "dur": 85.445, + "args": { + "External id": 297051,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368654269.051, "dur": 71.735, + "args": { + "External id": 297052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368654362.382, "dur": 1.942, + "args": { + "External id": 297053,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368654440.191, "dur": 1603.105, + "args": { + "External id": 297054,"Sequence number": 1209176, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3677 + } + }, + { + "ph": "f", "id": 56, "pid": 2070547, "tid": 2107622, "ts": 5333368654440.191, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368654542.241, "dur": 144.559, + "args": { + "External id": 297055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368654730.810, "dur": 38.394, + "args": { + "External id": 297056,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368654785.762, "dur": 53.915, + "args": { + "External id": 297057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368654849.695, "dur": 32.606, + "args": { + "External id": 297058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368654888.325, "dur": 44.837, + "args": { + "External id": 297059,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368654939.508, "dur": 27.267, + "args": { + "External id": 297060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368654973.921, "dur": 41.774, + "args": { + "External id": 297061,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368655036.533, "dur": 22.766, + "args": { + "External id": 297062,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368655076.315, "dur": 29.711, + "args": { + "External id": 297063,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368655125.400, "dur": 19.512, + "args": { + "External id": 297064,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368655157.982, "dur": 31.523, + "args": { + "External id": 297065,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655200.931, "dur": 33.089, + "args": { + "External id": 297066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655237.234, "dur": 32.880, + "args": { + "External id": 297067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368655295.815, "dur": 176.349, + "args": { + "External id": 297068,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368655376.606, "dur": 6.708, + "args": { + "External id": 297069,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368655385.135, "dur": 3.180, + "args": { + "External id": 297070,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368655502.472, "dur": 23.579, + "args": { + "External id": 297071,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368655537.054, "dur": 14.934, + "args": { + "External id": 297072,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655558.778, "dur": 32.289, + "args": { + "External id": 297073,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655596.678, "dur": 75.414, + "args": { + "External id": 297074,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655682.667, "dur": 25.693, + "args": { + "External id": 297075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655713.511, "dur": 30.500, + "args": { + "External id": 297076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655752.052, "dur": 21.964, + "args": { + "External id": 297077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368655780.637, "dur": 30.707, + "args": { + "External id": 297078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368655829.234, "dur": 21.870, + "args": { + "External id": 297079,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368655867.769, "dur": 22.998, + "args": { + "External id": 297080,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368655904.817, "dur": 16.996, + "args": { + "External id": 297081,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368655953.880, "dur": 24.280, + "args": { + "External id": 297082,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368655995.829, "dur": 17.984, + "args": { + "External id": 297083,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656089.708, "dur": 15.273, + "args": { + "External id": 297084,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656093.006, "dur": 10.974, + "args": { + "External id": 297085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656097.205, "dur": 5.902, + "args": { + "External id": 297086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656098.396, "dur": 4.626, + "args": { + "External id": 297087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656108.523, "dur": 5.241, + "args": { + "External id": 297088,"Record function id": 0, "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656110.159, "dur": 3.170, + "args": { + "External id": 297089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656111.446, "dur": 1.381, + "args": { + "External id": 297090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656111.781, "dur": 0.976, + "args": { + "External id": 297091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656117.094, "dur": 3.992, + "args": { + "External id": 297092,"Record function id": 0, "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656118.548, "dur": 2.137, + "args": { + "External id": 297093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656119.050, "dur": 1.209, + "args": { + "External id": 297094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656119.366, "dur": 0.826, + "args": { + "External id": 297095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656124.367, "dur": 3.887, + "args": { + "External id": 297096,"Record function id": 0, "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656125.415, "dur": 2.387, + "args": { + "External id": 297097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656125.996, "dur": 1.375, + "args": { + "External id": 297098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656126.350, "dur": 0.932, + "args": { + "External id": 297099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656131.553, "dur": 7.258, + "args": { + "External id": 297100,"Record function id": 0, "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656136.112, "dur": 2.280, + "args": { + "External id": 297101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656136.591, "dur": 1.395, + "args": { + "External id": 297102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656136.922, "dur": 0.976, + "args": { + "External id": 297103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656141.982, "dur": 4.737, + "args": { + "External id": 297104,"Record function id": 0, "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656143.572, "dur": 2.712, + "args": { + "External id": 297105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656144.573, "dur": 1.267, + "args": { + "External id": 297106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656144.998, "dur": 0.750, + "args": { + "External id": 297107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656149.931, "dur": 4.059, + "args": { + "External id": 297108,"Record function id": 0, "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656151.378, "dur": 2.189, + "args": { + "External id": 297109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656151.857, "dur": 1.298, + "args": { + "External id": 297110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656152.128, "dur": 0.920, + "args": { + "External id": 297111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656157.338, "dur": 3.544, + "args": { + "External id": 297112,"Record function id": 0, "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656158.546, "dur": 1.933, + "args": { + "External id": 297113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656158.993, "dur": 1.099, + "args": { + "External id": 297114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656159.251, "dur": 0.742, + "args": { + "External id": 297115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656164.564, "dur": 21.565, + "args": { + "External id": 297116,"Record function id": 0, "Ev Idx": 3739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368656180.759, "dur": 4.461, + "args": { + "External id": 297117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656181.912, "dur": 2.458, + "args": { + "External id": 297118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368656182.466, "dur": 1.639, + "args": { + "External id": 297119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368656195.129, "dur": 37805.630, + "args": { + "External id": 297120,"Record function id": 0, "Sequence number": 1209175, "Fwd thread id": 1, "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368656196.794, "dur": 37795.805, + "args": { + "External id": 297121,"Sequence number": 1209175, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3744 + } + }, + { + "ph": "f", "id": 57, "pid": 2070547, "tid": 2107622, "ts": 5333368656196.794, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2070547, "tid": 2107622, + "ts": 5333368656225.050, "dur": 37.268, + "args": { + "External id": 297122,"Record function id": 0, "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2070547, "tid": 2107622, + "ts": 5333368656268.983, "dur": 61.252, + "args": { + "External id": 297123,"Record function id": 0, "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2070547, "tid": 2107622, + "ts": 5333368656336.536, "dur": 37649.035, + "args": { + "External id": 297124,"Record function id": 0, "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368656420.561, "dur": 7.041, + "args": { + "External id": 297125,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368656436.621, "dur": 5.135, + "args": { + "External id": 297126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368656455.672, "dur": 36742.963, + "args": { + "External id": 297127,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368656468.851, "dur": 36720.042, + "args": { + "External id": 297128,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368656520.450, "dur": 14.033, + "args": { + "External id": 297129,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368656540.610, "dur": 36599.236, + "args": { + "External id": 297130,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368656543.090, "dur": 36595.942, + "args": { + "External id": 297131,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368656546.835, "dur": 4.922, + "args": { + "External id": 297132,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368656553.263, "dur": 36582.154, + "args": { + "External id": 297133,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368693290.251, "dur": 9.917, + "args": { + "External id": 297134,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368693293.058, "dur": 6.738, + "args": { + "External id": 297135,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368693328.697, "dur": 387.754, + "args": { + "External id": 297136,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368693353.809, "dur": 357.489, + "args": { + "External id": 297137,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3760, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368693364.884, "dur": 340.528, + "args": { + "External id": 297138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368693737.196, "dur": 2.089, + "args": { + "External id": 297139,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3762, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693797.816, "dur": 6.834, + "args": { + "External id": 297140,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693846.669, "dur": 1.334, + "args": { + "External id": 297141,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693862.186, "dur": 1.351, + "args": { + "External id": 297142,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693874.160, "dur": 0.934, + "args": { + "External id": 297143,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693886.347, "dur": 0.830, + "args": { + "External id": 297144,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693896.819, "dur": 0.950, + "args": { + "External id": 297145,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693907.461, "dur": 1.057, + "args": { + "External id": 297146,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693918.653, "dur": 0.966, + "args": { + "External id": 297147,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368693928.603, "dur": 1.014, + "args": { + "External id": 297148,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368694014.436, "dur": 2551.759, + "args": { + "External id": 297149,"Record function id": 0, "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2070547, "tid": 2107622, + "ts": 5333368694031.317, "dur": 946.151, + "args": { + "External id": 297150,"Record function id": 0, "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070547, "tid": 2107622, + "ts": 5333368694044.240, "dur": 301.257, + "args": { + "External id": 297151,"Record function id": 0, "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694113.689, "dur": 4.039, + "args": { + "External id": 297152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694121.173, "dur": 1.052, + "args": { + "External id": 297153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694123.947, "dur": 0.800, + "args": { + "External id": 297154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694126.217, "dur": 1.166, + "args": { + "External id": 297155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694128.719, "dur": 1.566, + "args": { + "External id": 297156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694131.740, "dur": 0.803, + "args": { + "External id": 297157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694134.094, "dur": 0.746, + "args": { + "External id": 297158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694136.098, "dur": 0.701, + "args": { + "External id": 297159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694138.149, "dur": 1.337, + "args": { + "External id": 297160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368694140.959, "dur": 0.898, + "args": { + "External id": 297161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368694158.898, "dur": 157.891, + "args": { + "External id": 297162,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368694190.557, "dur": 121.822, + "args": { + "External id": 297163,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368694202.968, "dur": 12.449, + "args": { + "External id": 297164,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368694219.269, "dur": 64.470, + "args": { + "External id": 297165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368694221.576, "dur": 61.874, + "args": { + "External id": 297166,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694225.341, "dur": 6.109, + "args": { + "External id": 297167,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368694232.988, "dur": 49.971, + "args": { + "External id": 297168,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2070547, "tid": 2107622, + "ts": 5333368694419.564, "dur": 550.251, + "args": { + "External id": 297169,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070547, "tid": 2107622, + "ts": 5333368694434.378, "dur": 522.424, + "args": { + "External id": 297170,"Record function id": 0, "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368694486.710, "dur": 4.858, + "args": { + "External id": 297171,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368694505.920, "dur": 29.366, + "args": { + "External id": 297172,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694510.919, "dur": 1.708, + "args": { + "External id": 297173,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694514.633, "dur": 0.755, + "args": { + "External id": 297174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694516.908, "dur": 0.684, + "args": { + "External id": 297175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694519.244, "dur": 0.788, + "args": { + "External id": 297176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694521.282, "dur": 0.793, + "args": { + "External id": 297177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694523.524, "dur": 0.941, + "args": { + "External id": 297178,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694526.338, "dur": 0.622, + "args": { + "External id": 297179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694528.266, "dur": 0.913, + "args": { + "External id": 297180,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694530.723, "dur": 0.745, + "args": { + "External id": 297181,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368694544.509, "dur": 29.825, + "args": { + "External id": 297182,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368694603.815, "dur": 142.435, + "args": { + "External id": 297183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368694613.580, "dur": 3.213, + "args": { + "External id": 297184,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368694658.708, "dur": 11.361, + "args": { + "External id": 297185,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368694662.736, "dur": 6.932, + "args": { + "External id": 297186,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694666.596, "dur": 1.100, + "args": { + "External id": 297187,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368694678.166, "dur": 25.621, + "args": { + "External id": 297188,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694680.369, "dur": 0.819, + "args": { + "External id": 297189,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694683.808, "dur": 0.672, + "args": { + "External id": 297190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694687.496, "dur": 1.106, + "args": { + "External id": 297191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694689.740, "dur": 0.713, + "args": { + "External id": 297192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694691.818, "dur": 0.895, + "args": { + "External id": 297193,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694694.116, "dur": 1.016, + "args": { + "External id": 297194,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694696.680, "dur": 0.733, + "args": { + "External id": 297195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694698.393, "dur": 0.781, + "args": { + "External id": 297196,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368694700.533, "dur": 0.771, + "args": { + "External id": 297197,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368694714.281, "dur": 23.718, + "args": { + "External id": 297198,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368694787.554, "dur": 108.222, + "args": { + "External id": 297199,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368694808.617, "dur": 84.125, + "args": { + "External id": 297200,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3823, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368694817.800, "dur": 71.111, + "args": { + "External id": 297201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368694909.373, "dur": 1.743, + "args": { + "External id": 297202,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3825, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368694984.132, "dur": 1560.231, + "args": { + "External id": 297203,"Sequence number": 1209174, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3826 + } + }, + { + "ph": "f", "id": 58, "pid": 2070547, "tid": 2107622, "ts": 5333368694984.132, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695085.874, "dur": 119.986, + "args": { + "External id": 297204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368695247.441, "dur": 38.587, + "args": { + "External id": 297205,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695302.110, "dur": 52.852, + "args": { + "External id": 297206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695364.804, "dur": 33.194, + "args": { + "External id": 297207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695404.296, "dur": 51.998, + "args": { + "External id": 297208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695462.898, "dur": 27.310, + "args": { + "External id": 297209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695497.453, "dur": 42.584, + "args": { + "External id": 297210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368695561.953, "dur": 23.406, + "args": { + "External id": 297211,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368695602.460, "dur": 64.870, + "args": { + "External id": 297212,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368695691.636, "dur": 20.831, + "args": { + "External id": 297213,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368695725.556, "dur": 15.536, + "args": { + "External id": 297214,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695749.758, "dur": 33.884, + "args": { + "External id": 297215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368695786.828, "dur": 34.638, + "args": { + "External id": 297216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368695847.497, "dur": 171.980, + "args": { + "External id": 297217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368695926.851, "dur": 6.335, + "args": { + "External id": 297218,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368695935.053, "dur": 3.617, + "args": { + "External id": 297219,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368696047.777, "dur": 24.198, + "args": { + "External id": 297220,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368696083.395, "dur": 14.874, + "args": { + "External id": 297221,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368696105.345, "dur": 33.209, + "args": { + "External id": 297222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368696144.383, "dur": 50.327, + "args": { + "External id": 297223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368696204.523, "dur": 24.262, + "args": { + "External id": 297224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368696233.579, "dur": 28.894, + "args": { + "External id": 297225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368696268.629, "dur": 20.189, + "args": { + "External id": 297226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368696295.550, "dur": 29.110, + "args": { + "External id": 297227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368696341.868, "dur": 22.184, + "args": { + "External id": 297228,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368696381.983, "dur": 22.046, + "args": { + "External id": 297229,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368696417.872, "dur": 16.040, + "args": { + "External id": 297230,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368696448.455, "dur": 14.058, + "args": { + "External id": 297231,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368696486.989, "dur": 26.104, + "args": { + "External id": 297232,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696588.432, "dur": 14.480, + "args": { + "External id": 297233,"Record function id": 0, "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696591.166, "dur": 10.740, + "args": { + "External id": 297234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696595.232, "dur": 5.804, + "args": { + "External id": 297235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696596.469, "dur": 4.434, + "args": { + "External id": 297236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696606.805, "dur": 7.782, + "args": { + "External id": 297237,"Record function id": 0, "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696611.237, "dur": 2.857, + "args": { + "External id": 297238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696612.229, "dur": 1.407, + "args": { + "External id": 297239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696612.697, "dur": 0.838, + "args": { + "External id": 297240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696664.296, "dur": 11.347, + "args": { + "External id": 297241,"Record function id": 0, "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696670.408, "dur": 4.496, + "args": { + "External id": 297242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696671.537, "dur": 2.486, + "args": { + "External id": 297243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696672.099, "dur": 1.681, + "args": { + "External id": 297244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696680.707, "dur": 6.563, + "args": { + "External id": 297245,"Record function id": 0, "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696682.140, "dur": 4.706, + "args": { + "External id": 297246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696682.876, "dur": 3.467, + "args": { + "External id": 297247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696685.283, "dur": 0.968, + "args": { + "External id": 297248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696690.508, "dur": 3.912, + "args": { + "External id": 297249,"Record function id": 0, "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696691.459, "dur": 2.541, + "args": { + "External id": 297250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696691.946, "dur": 1.581, + "args": { + "External id": 297251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696692.379, "dur": 1.075, + "args": { + "External id": 297252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696697.695, "dur": 4.270, + "args": { + "External id": 297253,"Record function id": 0, "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696698.926, "dur": 2.575, + "args": { + "External id": 297254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696699.719, "dur": 1.357, + "args": { + "External id": 297255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696700.063, "dur": 0.909, + "args": { + "External id": 297256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696705.171, "dur": 4.630, + "args": { + "External id": 297257,"Record function id": 0, "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696706.545, "dur": 2.805, + "args": { + "External id": 297258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696707.706, "dur": 1.220, + "args": { + "External id": 297259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696708.092, "dur": 0.769, + "args": { + "External id": 297260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696713.019, "dur": 4.261, + "args": { + "External id": 297261,"Record function id": 0, "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696714.106, "dur": 2.767, + "args": { + "External id": 297262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696714.849, "dur": 1.603, + "args": { + "External id": 297263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696715.377, "dur": 0.981, + "args": { + "External id": 297264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696720.418, "dur": 5.052, + "args": { + "External id": 297265,"Record function id": 0, "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368696721.802, "dur": 3.237, + "args": { + "External id": 297266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696722.505, "dur": 2.090, + "args": { + "External id": 297267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368696723.325, "dur": 1.195, + "args": { + "External id": 297268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368696729.342, "dur": 37551.276, + "args": { + "External id": 297269,"Record function id": 0, "Sequence number": 1209173, "Fwd thread id": 1, "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368696730.945, "dur": 37541.016, + "args": { + "External id": 297270,"Sequence number": 1209173, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3893 + } + }, + { + "ph": "f", "id": 59, "pid": 2070547, "tid": 2107622, "ts": 5333368696730.945, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2070547, "tid": 2107622, + "ts": 5333368696760.551, "dur": 35.779, + "args": { + "External id": 297271,"Record function id": 0, "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2070547, "tid": 2107622, + "ts": 5333368696803.714, "dur": 59.831, + "args": { + "External id": 297272,"Record function id": 0, "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2070547, "tid": 2107622, + "ts": 5333368696869.055, "dur": 37395.333, + "args": { + "External id": 297273,"Record function id": 0, "Ev Idx": 3896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368696957.122, "dur": 6.870, + "args": { + "External id": 297274,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368696973.231, "dur": 4.802, + "args": { + "External id": 297275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368696991.647, "dur": 36456.390, + "args": { + "External id": 297276,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368697004.312, "dur": 36436.058, + "args": { + "External id": 297277,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368697038.540, "dur": 13.985, + "args": { + "External id": 297278,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368697058.692, "dur": 36342.240, + "args": { + "External id": 297279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368697061.016, "dur": 36339.152, + "args": { + "External id": 297280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368697064.511, "dur": 5.806, + "args": { + "External id": 297281,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368697071.992, "dur": 36324.776, + "args": { + "External id": 297282,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368733532.849, "dur": 8.715, + "args": { + "External id": 297283,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368733535.476, "dur": 5.761, + "args": { + "External id": 297284,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368733570.612, "dur": 397.641, + "args": { + "External id": 297285,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368733595.669, "dur": 367.472, + "args": { + "External id": 297286,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3909, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368733605.466, "dur": 352.365, + "args": { + "External id": 297287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368733988.505, "dur": 2.478, + "args": { + "External id": 297288,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3911, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734050.097, "dur": 6.678, + "args": { + "External id": 297289,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734100.557, "dur": 1.366, + "args": { + "External id": 297290,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734116.339, "dur": 1.320, + "args": { + "External id": 297291,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734128.174, "dur": 1.187, + "args": { + "External id": 297292,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734140.032, "dur": 1.096, + "args": { + "External id": 297293,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734150.144, "dur": 0.984, + "args": { + "External id": 297294,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734160.929, "dur": 0.947, + "args": { + "External id": 297295,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734191.316, "dur": 1.432, + "args": { + "External id": 297296,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734203.954, "dur": 1.125, + "args": { + "External id": 297297,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368734296.331, "dur": 2639.522, + "args": { + "External id": 297298,"Record function id": 0, "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2070547, "tid": 2107622, + "ts": 5333368734315.316, "dur": 976.446, + "args": { + "External id": 297299,"Record function id": 0, "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070547, "tid": 2107622, + "ts": 5333368734328.581, "dur": 326.847, + "args": { + "External id": 297300,"Record function id": 0, "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734404.090, "dur": 4.615, + "args": { + "External id": 297301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734411.699, "dur": 1.041, + "args": { + "External id": 297302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734414.919, "dur": 0.836, + "args": { + "External id": 297303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734417.841, "dur": 1.355, + "args": { + "External id": 297304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734420.763, "dur": 1.030, + "args": { + "External id": 297305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734423.726, "dur": 0.825, + "args": { + "External id": 297306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734426.026, "dur": 0.920, + "args": { + "External id": 297307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734428.610, "dur": 0.920, + "args": { + "External id": 297308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734430.833, "dur": 0.759, + "args": { + "External id": 297309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368734432.854, "dur": 1.071, + "args": { + "External id": 297310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368734450.961, "dur": 142.326, + "args": { + "External id": 297311,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368734466.305, "dur": 122.799, + "args": { + "External id": 297312,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368734477.983, "dur": 13.502, + "args": { + "External id": 297313,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368734495.180, "dur": 65.408, + "args": { + "External id": 297314,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368734498.114, "dur": 62.121, + "args": { + "External id": 297315,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734502.105, "dur": 5.336, + "args": { + "External id": 297316,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368734509.031, "dur": 50.743, + "args": { + "External id": 297317,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2070547, "tid": 2107622, + "ts": 5333368734740.479, "dur": 543.499, + "args": { + "External id": 297318,"Record function id": 0, "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070547, "tid": 2107622, + "ts": 5333368734756.841, "dur": 515.008, + "args": { + "External id": 297319,"Record function id": 0, "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368734813.521, "dur": 5.985, + "args": { + "External id": 297320,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368734834.843, "dur": 27.606, + "args": { + "External id": 297321,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734839.849, "dur": 1.784, + "args": { + "External id": 297322,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734843.751, "dur": 0.712, + "args": { + "External id": 297323,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734845.828, "dur": 0.524, + "args": { + "External id": 297324,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734847.703, "dur": 0.893, + "args": { + "External id": 297325,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734849.934, "dur": 0.724, + "args": { + "External id": 297326,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734852.047, "dur": 0.957, + "args": { + "External id": 297327,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734853.937, "dur": 0.645, + "args": { + "External id": 297328,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734856.124, "dur": 0.842, + "args": { + "External id": 297329,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734858.590, "dur": 0.880, + "args": { + "External id": 297330,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368734871.601, "dur": 33.890, + "args": { + "External id": 297331,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368734933.719, "dur": 97.340, + "args": { + "External id": 297332,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368734943.130, "dur": 2.706, + "args": { + "External id": 297333,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368734950.488, "dur": 9.708, + "args": { + "External id": 297334,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368734954.405, "dur": 5.379, + "args": { + "External id": 297335,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734957.893, "dur": 0.714, + "args": { + "External id": 297336,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368734966.296, "dur": 27.600, + "args": { + "External id": 297337,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734968.118, "dur": 0.785, + "args": { + "External id": 297338,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734970.455, "dur": 1.076, + "args": { + "External id": 297339,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734973.307, "dur": 1.009, + "args": { + "External id": 297340,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734975.851, "dur": 0.592, + "args": { + "External id": 297341,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734977.910, "dur": 0.330, + "args": { + "External id": 297342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734979.742, "dur": 1.249, + "args": { + "External id": 297343,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734982.392, "dur": 0.904, + "args": { + "External id": 297344,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734984.586, "dur": 1.212, + "args": { + "External id": 297345,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368734987.244, "dur": 1.171, + "args": { + "External id": 297346,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368735004.054, "dur": 20.039, + "args": { + "External id": 297347,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368735072.469, "dur": 131.093, + "args": { + "External id": 297348,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368735096.636, "dur": 103.076, + "args": { + "External id": 297349,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3972, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368735105.396, "dur": 88.464, + "args": { + "External id": 297350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368735219.805, "dur": 2.113, + "args": { + "External id": 297351,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3974, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368735298.558, "dur": 1613.252, + "args": { + "External id": 297352,"Sequence number": 1209172, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3975 + } + }, + { + "ph": "f", "id": 60, "pid": 2070547, "tid": 2107622, "ts": 5333368735298.558, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368735404.950, "dur": 104.856, + "args": { + "External id": 297353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368735548.032, "dur": 37.355, + "args": { + "External id": 297354,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368735600.971, "dur": 92.767, + "args": { + "External id": 297355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368735707.648, "dur": 35.470, + "args": { + "External id": 297356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368735749.283, "dur": 46.859, + "args": { + "External id": 297357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368735803.369, "dur": 27.502, + "args": { + "External id": 297358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368735838.254, "dur": 42.164, + "args": { + "External id": 297359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368735903.050, "dur": 24.314, + "args": { + "External id": 297360,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368735946.911, "dur": 30.283, + "args": { + "External id": 297361,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368735997.360, "dur": 19.440, + "args": { + "External id": 297362,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368736028.848, "dur": 15.581, + "args": { + "External id": 297363,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736056.076, "dur": 28.800, + "args": { + "External id": 297364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736088.426, "dur": 32.988, + "args": { + "External id": 297365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368736147.632, "dur": 188.332, + "args": { + "External id": 297366,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368736240.593, "dur": 7.493, + "args": { + "External id": 297367,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368736250.374, "dur": 3.347, + "args": { + "External id": 297368,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368736366.112, "dur": 23.928, + "args": { + "External id": 297369,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368736402.259, "dur": 15.267, + "args": { + "External id": 297370,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736425.199, "dur": 38.225, + "args": { + "External id": 297371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736472.078, "dur": 35.070, + "args": { + "External id": 297372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736513.695, "dur": 25.171, + "args": { + "External id": 297373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736543.653, "dur": 28.682, + "args": { + "External id": 297374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736578.120, "dur": 20.835, + "args": { + "External id": 297375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368736604.916, "dur": 66.780, + "args": { + "External id": 297376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368736693.643, "dur": 22.365, + "args": { + "External id": 297377,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368736749.770, "dur": 31.859, + "args": { + "External id": 297378,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368736801.224, "dur": 17.493, + "args": { + "External id": 297379,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368736835.087, "dur": 13.720, + "args": { + "External id": 297380,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368736865.669, "dur": 18.031, + "args": { + "External id": 297381,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736957.565, "dur": 15.133, + "args": { + "External id": 297382,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736960.770, "dur": 10.922, + "args": { + "External id": 297383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736964.964, "dur": 5.704, + "args": { + "External id": 297384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736966.438, "dur": 4.149, + "args": { + "External id": 297385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736976.519, "dur": 7.572, + "args": { + "External id": 297386,"Record function id": 0, "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736977.728, "dur": 5.914, + "args": { + "External id": 297387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736981.582, "dur": 1.533, + "args": { + "External id": 297388,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736982.059, "dur": 0.992, + "args": { + "External id": 297389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736987.317, "dur": 3.744, + "args": { + "External id": 297390,"Record function id": 0, "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736988.361, "dur": 2.278, + "args": { + "External id": 297391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736988.841, "dur": 1.383, + "args": { + "External id": 297392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736989.337, "dur": 0.793, + "args": { + "External id": 297393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736994.127, "dur": 3.963, + "args": { + "External id": 297394,"Record function id": 0, "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368736995.484, "dur": 2.186, + "args": { + "External id": 297395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736995.929, "dur": 1.305, + "args": { + "External id": 297396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368736996.243, "dur": 0.909, + "args": { + "External id": 297397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737001.221, "dur": 4.384, + "args": { + "External id": 297398,"Record function id": 0, "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737002.756, "dur": 2.422, + "args": { + "External id": 297399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737003.339, "dur": 1.425, + "args": { + "External id": 297400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737003.819, "dur": 0.870, + "args": { + "External id": 297401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737008.882, "dur": 3.478, + "args": { + "External id": 297402,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737009.836, "dur": 2.113, + "args": { + "External id": 297403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737010.357, "dur": 1.146, + "args": { + "External id": 297404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737010.667, "dur": 0.736, + "args": { + "External id": 297405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737015.556, "dur": 4.082, + "args": { + "External id": 297406,"Record function id": 0, "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737016.772, "dur": 2.477, + "args": { + "External id": 297407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737017.217, "dur": 1.592, + "args": { + "External id": 297408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737017.786, "dur": 0.923, + "args": { + "External id": 297409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737022.698, "dur": 3.779, + "args": { + "External id": 297410,"Record function id": 0, "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737023.746, "dur": 2.326, + "args": { + "External id": 297411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737024.307, "dur": 1.341, + "args": { + "External id": 297412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737024.783, "dur": 0.800, + "args": { + "External id": 297413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737029.662, "dur": 4.234, + "args": { + "External id": 297414,"Record function id": 0, "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368737031.147, "dur": 2.353, + "args": { + "External id": 297415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737031.689, "dur": 1.380, + "args": { + "External id": 297416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368737032.076, "dur": 0.920, + "args": { + "External id": 297417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368737037.514, "dur": 38285.412, + "args": { + "External id": 297418,"Record function id": 0, "Sequence number": 1209171, "Fwd thread id": 1, "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368737038.890, "dur": 38275.440, + "args": { + "External id": 297419,"Sequence number": 1209171, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4042 + } + }, + { + "ph": "f", "id": 61, "pid": 2070547, "tid": 2107622, "ts": 5333368737038.890, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2070547, "tid": 2107622, + "ts": 5333368737069.877, "dur": 35.678, + "args": { + "External id": 297420,"Record function id": 0, "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2070547, "tid": 2107622, + "ts": 5333368737113.075, "dur": 81.622, + "args": { + "External id": 297421,"Record function id": 0, "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2070547, "tid": 2107622, + "ts": 5333368737202.250, "dur": 38103.859, + "args": { + "External id": 297422,"Record function id": 0, "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368737289.858, "dur": 7.069, + "args": { + "External id": 297423,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368737306.584, "dur": 5.125, + "args": { + "External id": 297424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368737325.623, "dur": 37071.345, + "args": { + "External id": 297425,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368737339.634, "dur": 37048.504, + "args": { + "External id": 297426,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368737388.389, "dur": 17.652, + "args": { + "External id": 297427,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368737412.289, "dur": 36938.394, + "args": { + "External id": 297428,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368737414.915, "dur": 36934.981, + "args": { + "External id": 297429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368737418.479, "dur": 5.362, + "args": { + "External id": 297430,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368737425.435, "dur": 36921.016, + "args": { + "External id": 297431,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368774482.425, "dur": 9.134, + "args": { + "External id": 297432,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368774485.210, "dur": 6.005, + "args": { + "External id": 297433,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368774520.195, "dur": 484.375, + "args": { + "External id": 297434,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368774545.301, "dur": 453.716, + "args": { + "External id": 297435,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4058, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368774556.097, "dur": 436.741, + "args": { + "External id": 297436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368775025.905, "dur": 2.154, + "args": { + "External id": 297437,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4060, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775090.007, "dur": 6.822, + "args": { + "External id": 297438,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775140.046, "dur": 1.396, + "args": { + "External id": 297439,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775156.035, "dur": 1.423, + "args": { + "External id": 297440,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775185.562, "dur": 2.168, + "args": { + "External id": 297441,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775200.626, "dur": 0.824, + "args": { + "External id": 297442,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775211.635, "dur": 1.203, + "args": { + "External id": 297443,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775223.280, "dur": 1.146, + "args": { + "External id": 297444,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775234.745, "dur": 0.992, + "args": { + "External id": 297445,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775245.231, "dur": 1.022, + "args": { + "External id": 297446,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368775338.375, "dur": 2615.337, + "args": { + "External id": 297447,"Record function id": 0, "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2070547, "tid": 2107622, + "ts": 5333368775357.102, "dur": 982.072, + "args": { + "External id": 297448,"Record function id": 0, "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070547, "tid": 2107622, + "ts": 5333368775370.363, "dur": 329.836, + "args": { + "External id": 297449,"Record function id": 0, "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775445.446, "dur": 4.358, + "args": { + "External id": 297450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775452.798, "dur": 0.897, + "args": { + "External id": 297451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775455.492, "dur": 0.970, + "args": { + "External id": 297452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775458.019, "dur": 1.199, + "args": { + "External id": 297453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775460.412, "dur": 0.938, + "args": { + "External id": 297454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775462.864, "dur": 0.852, + "args": { + "External id": 297455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775465.428, "dur": 1.381, + "args": { + "External id": 297456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775468.059, "dur": 1.502, + "args": { + "External id": 297457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775471.396, "dur": 1.022, + "args": { + "External id": 297458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368775473.779, "dur": 0.837, + "args": { + "External id": 297459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368775491.690, "dur": 177.608, + "args": { + "External id": 297460,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368775506.793, "dur": 156.847, + "args": { + "External id": 297461,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368775519.788, "dur": 12.673, + "args": { + "External id": 297462,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368775536.311, "dur": 62.939, + "args": { + "External id": 297463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368775538.701, "dur": 60.298, + "args": { + "External id": 297464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775541.772, "dur": 5.835, + "args": { + "External id": 297465,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368775549.233, "dur": 49.193, + "args": { + "External id": 297466,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2070547, "tid": 2107622, + "ts": 5333368775777.410, "dur": 552.862, + "args": { + "External id": 297467,"Record function id": 0, "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070547, "tid": 2107622, + "ts": 5333368775793.602, "dur": 523.366, + "args": { + "External id": 297468,"Record function id": 0, "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368775851.059, "dur": 5.591, + "args": { + "External id": 297469,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368775871.422, "dur": 34.057, + "args": { + "External id": 297470,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775877.128, "dur": 2.096, + "args": { + "External id": 297471,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775880.621, "dur": 0.883, + "args": { + "External id": 297472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775892.065, "dur": 0.513, + "args": { + "External id": 297473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775893.771, "dur": 0.694, + "args": { + "External id": 297474,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775895.219, "dur": 0.595, + "args": { + "External id": 297475,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775897.147, "dur": 0.711, + "args": { + "External id": 297476,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775898.703, "dur": 0.639, + "args": { + "External id": 297477,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775900.485, "dur": 0.381, + "args": { + "External id": 297478,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368775902.142, "dur": 0.392, + "args": { + "External id": 297479,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368775915.742, "dur": 34.028, + "args": { + "External id": 297480,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368775979.833, "dur": 93.541, + "args": { + "External id": 297481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368775989.771, "dur": 3.231, + "args": { + "External id": 297482,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368775997.630, "dur": 9.160, + "args": { + "External id": 297483,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368776001.405, "dur": 4.990, + "args": { + "External id": 297484,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776004.429, "dur": 0.713, + "args": { + "External id": 297485,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368776013.469, "dur": 24.556, + "args": { + "External id": 297486,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776014.984, "dur": 0.727, + "args": { + "External id": 297487,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776017.010, "dur": 0.612, + "args": { + "External id": 297488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776018.848, "dur": 0.864, + "args": { + "External id": 297489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776020.818, "dur": 0.650, + "args": { + "External id": 297490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776022.473, "dur": 0.700, + "args": { + "External id": 297491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776024.224, "dur": 0.826, + "args": { + "External id": 297492,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776026.313, "dur": 0.891, + "args": { + "External id": 297493,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776028.231, "dur": 0.577, + "args": { + "External id": 297494,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368776030.260, "dur": 0.645, + "args": { + "External id": 297495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368776047.419, "dur": 19.015, + "args": { + "External id": 297496,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368776118.785, "dur": 130.016, + "args": { + "External id": 297497,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368776140.305, "dur": 104.823, + "args": { + "External id": 297498,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4121, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368776149.249, "dur": 91.388, + "args": { + "External id": 297499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368776264.632, "dur": 1.643, + "args": { + "External id": 297500,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4123, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368776345.981, "dur": 1587.087, + "args": { + "External id": 297501,"Sequence number": 1209170, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4124 + } + }, + { + "ph": "f", "id": 62, "pid": 2070547, "tid": 2107622, "ts": 5333368776345.981, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368776449.511, "dur": 103.677, + "args": { + "External id": 297502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368776591.871, "dur": 75.959, + "args": { + "External id": 297503,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368776688.800, "dur": 58.860, + "args": { + "External id": 297504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368776758.173, "dur": 35.357, + "args": { + "External id": 297505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368776799.892, "dur": 45.145, + "args": { + "External id": 297506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368776851.974, "dur": 26.603, + "args": { + "External id": 297507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368776885.892, "dur": 40.495, + "args": { + "External id": 297508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368776948.874, "dur": 25.217, + "args": { + "External id": 297509,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368776991.018, "dur": 27.399, + "args": { + "External id": 297510,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368777037.118, "dur": 18.570, + "args": { + "External id": 297511,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368777068.281, "dur": 19.516, + "args": { + "External id": 297512,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777094.998, "dur": 27.755, + "args": { + "External id": 297513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777125.632, "dur": 34.015, + "args": { + "External id": 297514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368777203.333, "dur": 169.007, + "args": { + "External id": 297515,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368777278.646, "dur": 5.980, + "args": { + "External id": 297516,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368777286.622, "dur": 2.509, + "args": { + "External id": 297517,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368777399.945, "dur": 26.534, + "args": { + "External id": 297518,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368777436.825, "dur": 15.107, + "args": { + "External id": 297519,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777459.918, "dur": 40.726, + "args": { + "External id": 297520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777506.607, "dur": 33.494, + "args": { + "External id": 297521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777546.397, "dur": 21.407, + "args": { + "External id": 297522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777571.844, "dur": 28.560, + "args": { + "External id": 297523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777605.422, "dur": 56.998, + "args": { + "External id": 297524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368777672.861, "dur": 34.285, + "args": { + "External id": 297525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368777724.014, "dur": 24.945, + "args": { + "External id": 297526,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368777764.204, "dur": 22.831, + "args": { + "External id": 297527,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368777818.235, "dur": 24.523, + "args": { + "External id": 297528,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368777858.066, "dur": 14.768, + "args": { + "External id": 297529,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368777883.533, "dur": 17.192, + "args": { + "External id": 297530,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368777976.397, "dur": 17.378, + "args": { + "External id": 297531,"Record function id": 0, "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368777979.616, "dur": 13.240, + "args": { + "External id": 297532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368777983.715, "dur": 8.277, + "args": { + "External id": 297533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368777987.991, "dur": 3.914, + "args": { + "External id": 297534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368777997.795, "dur": 4.476, + "args": { + "External id": 297535,"Record function id": 0, "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368777999.304, "dur": 2.510, + "args": { + "External id": 297536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778000.193, "dur": 1.146, + "args": { + "External id": 297537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778000.561, "dur": 0.708, + "args": { + "External id": 297538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778005.624, "dur": 3.676, + "args": { + "External id": 297539,"Record function id": 0, "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778006.659, "dur": 2.194, + "args": { + "External id": 297540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778007.238, "dur": 1.151, + "args": { + "External id": 297541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778007.606, "dur": 0.695, + "args": { + "External id": 297542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778012.452, "dur": 3.448, + "args": { + "External id": 297543,"Record function id": 0, "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778013.561, "dur": 1.937, + "args": { + "External id": 297544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778014.073, "dur": 0.983, + "args": { + "External id": 297545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778014.362, "dur": 0.607, + "args": { + "External id": 297546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778018.997, "dur": 3.725, + "args": { + "External id": 297547,"Record function id": 0, "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778020.237, "dur": 2.043, + "args": { + "External id": 297548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778020.712, "dur": 1.126, + "args": { + "External id": 297549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778021.097, "dur": 0.667, + "args": { + "External id": 297550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778025.830, "dur": 3.414, + "args": { + "External id": 297551,"Record function id": 0, "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778026.906, "dur": 1.902, + "args": { + "External id": 297552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778027.404, "dur": 0.987, + "args": { + "External id": 297553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778027.797, "dur": 0.526, + "args": { + "External id": 297554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778032.482, "dur": 3.594, + "args": { + "External id": 297555,"Record function id": 0, "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778033.480, "dur": 2.182, + "args": { + "External id": 297556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778034.011, "dur": 1.207, + "args": { + "External id": 297557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778034.545, "dur": 0.576, + "args": { + "External id": 297558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778039.284, "dur": 3.498, + "args": { + "External id": 297559,"Record function id": 0, "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778040.347, "dur": 2.033, + "args": { + "External id": 297560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778040.846, "dur": 1.085, + "args": { + "External id": 297561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778041.171, "dur": 0.687, + "args": { + "External id": 297562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778048.801, "dur": 3.342, + "args": { + "External id": 297563,"Record function id": 0, "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368778049.854, "dur": 1.876, + "args": { + "External id": 297564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778050.321, "dur": 0.975, + "args": { + "External id": 297565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368778050.676, "dur": 0.544, + "args": { + "External id": 297566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368778055.808, "dur": 35789.438, + "args": { + "External id": 297567,"Record function id": 0, "Sequence number": 1209169, "Fwd thread id": 1, "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368778057.042, "dur": 35779.765, + "args": { + "External id": 297568,"Sequence number": 1209169, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4191 + } + }, + { + "ph": "f", "id": 63, "pid": 2070547, "tid": 2107622, "ts": 5333368778057.042, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2070547, "tid": 2107622, + "ts": 5333368778085.135, "dur": 40.344, + "args": { + "External id": 297569,"Record function id": 0, "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2070547, "tid": 2107622, + "ts": 5333368778133.829, "dur": 80.663, + "args": { + "External id": 297570,"Record function id": 0, "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2070547, "tid": 2107622, + "ts": 5333368778222.164, "dur": 35605.849, + "args": { + "External id": 297571,"Record function id": 0, "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368778312.090, "dur": 6.811, + "args": { + "External id": 297572,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368778328.950, "dur": 4.946, + "args": { + "External id": 297573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368778348.435, "dur": 34664.910, + "args": { + "External id": 297574,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368778361.127, "dur": 34643.897, + "args": { + "External id": 297575,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368778410.262, "dur": 13.961, + "args": { + "External id": 297576,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368778433.447, "dur": 34534.744, + "args": { + "External id": 297577,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368778435.748, "dur": 34531.772, + "args": { + "External id": 297578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368778439.310, "dur": 5.299, + "args": { + "External id": 297579,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368778446.181, "dur": 34517.871, + "args": { + "External id": 297580,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368813096.486, "dur": 8.928, + "args": { + "External id": 297581,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368813099.017, "dur": 6.017, + "args": { + "External id": 297582,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368813132.612, "dur": 391.065, + "args": { + "External id": 297583,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368813154.847, "dur": 364.017, + "args": { + "External id": 297584,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4207, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368813175.531, "dur": 337.420, + "args": { + "External id": 297585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368813541.594, "dur": 2.161, + "args": { + "External id": 297586,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4209, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813603.461, "dur": 6.307, + "args": { + "External id": 297587,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813682.356, "dur": 2.100, + "args": { + "External id": 297588,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813700.343, "dur": 1.367, + "args": { + "External id": 297589,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813712.571, "dur": 1.031, + "args": { + "External id": 297590,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813723.514, "dur": 1.004, + "args": { + "External id": 297591,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813733.276, "dur": 0.730, + "args": { + "External id": 297592,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813743.138, "dur": 1.297, + "args": { + "External id": 297593,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813753.971, "dur": 1.084, + "args": { + "External id": 297594,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368813766.061, "dur": 1.090, + "args": { + "External id": 297595,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368813861.137, "dur": 2560.452, + "args": { + "External id": 297596,"Record function id": 0, "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2070547, "tid": 2107622, + "ts": 5333368813880.557, "dur": 937.049, + "args": { + "External id": 297597,"Record function id": 0, "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070547, "tid": 2107622, + "ts": 5333368813894.410, "dur": 303.056, + "args": { + "External id": 297598,"Record function id": 0, "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813969.707, "dur": 3.921, + "args": { + "External id": 297599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813976.698, "dur": 0.675, + "args": { + "External id": 297600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813978.946, "dur": 0.690, + "args": { + "External id": 297601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813981.172, "dur": 0.629, + "args": { + "External id": 297602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813983.493, "dur": 0.730, + "args": { + "External id": 297603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813985.401, "dur": 0.726, + "args": { + "External id": 297604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813987.565, "dur": 0.693, + "args": { + "External id": 297605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813989.498, "dur": 0.551, + "args": { + "External id": 297606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813991.631, "dur": 1.212, + "args": { + "External id": 297607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368813994.309, "dur": 1.315, + "args": { + "External id": 297608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368814013.178, "dur": 139.243, + "args": { + "External id": 297609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368814027.927, "dur": 120.073, + "args": { + "External id": 297610,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368814040.415, "dur": 13.028, + "args": { + "External id": 297611,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368814057.079, "dur": 63.358, + "args": { + "External id": 297612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368814059.305, "dur": 60.803, + "args": { + "External id": 297613,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814062.497, "dur": 5.163, + "args": { + "External id": 297614,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368814069.255, "dur": 50.404, + "args": { + "External id": 297615,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2070547, "tid": 2107622, + "ts": 5333368814275.889, "dur": 534.092, + "args": { + "External id": 297616,"Record function id": 0, "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070547, "tid": 2107622, + "ts": 5333368814291.079, "dur": 506.257, + "args": { + "External id": 297617,"Record function id": 0, "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368814350.711, "dur": 6.048, + "args": { + "External id": 297618,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368814372.044, "dur": 17.829, + "args": { + "External id": 297619,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814376.137, "dur": 1.624, + "args": { + "External id": 297620,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814379.210, "dur": 0.494, + "args": { + "External id": 297621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814380.417, "dur": 0.343, + "args": { + "External id": 297622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814381.359, "dur": 0.320, + "args": { + "External id": 297623,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814382.424, "dur": 0.290, + "args": { + "External id": 297624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814383.503, "dur": 0.307, + "args": { + "External id": 297625,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814384.562, "dur": 0.433, + "args": { + "External id": 297626,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814385.910, "dur": 0.208, + "args": { + "External id": 297627,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814386.662, "dur": 0.429, + "args": { + "External id": 297628,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368814399.329, "dur": 32.432, + "args": { + "External id": 297629,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368814460.392, "dur": 85.427, + "args": { + "External id": 297630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368814469.621, "dur": 2.973, + "args": { + "External id": 297631,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368814477.243, "dur": 8.559, + "args": { + "External id": 297632,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368814481.030, "dur": 4.360, + "args": { + "External id": 297633,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814483.847, "dur": 0.436, + "args": { + "External id": 297634,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368814491.903, "dur": 18.712, + "args": { + "External id": 297635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814493.249, "dur": 0.419, + "args": { + "External id": 297636,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814494.551, "dur": 0.247, + "args": { + "External id": 297637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814495.641, "dur": 0.503, + "args": { + "External id": 297638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814497.116, "dur": 0.251, + "args": { + "External id": 297639,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814498.159, "dur": 0.253, + "args": { + "External id": 297640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814499.598, "dur": 0.478, + "args": { + "External id": 297641,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814501.216, "dur": 0.216, + "args": { + "External id": 297642,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814502.342, "dur": 0.320, + "args": { + "External id": 297643,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368814503.562, "dur": 0.265, + "args": { + "External id": 297644,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368814518.794, "dur": 20.103, + "args": { + "External id": 297645,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368814585.286, "dur": 146.715, + "args": { + "External id": 297646,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368814605.429, "dur": 122.969, + "args": { + "External id": 297647,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4270, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368814613.658, "dur": 110.574, + "args": { + "External id": 297648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368814745.683, "dur": 1.766, + "args": { + "External id": 297649,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4272, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368814823.800, "dur": 1576.476, + "args": { + "External id": 297650,"Sequence number": 1209168, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4273 + } + }, + { + "ph": "f", "id": 64, "pid": 2070547, "tid": 2107622, "ts": 5333368814823.800, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368814929.785, "dur": 100.065, + "args": { + "External id": 297651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368815063.967, "dur": 36.122, + "args": { + "External id": 297652,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815114.790, "dur": 46.829, + "args": { + "External id": 297653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815192.444, "dur": 36.858, + "args": { + "External id": 297654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815235.774, "dur": 45.516, + "args": { + "External id": 297655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815286.730, "dur": 26.859, + "args": { + "External id": 297656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815320.028, "dur": 41.383, + "args": { + "External id": 297657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368815383.619, "dur": 23.804, + "args": { + "External id": 297658,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368815424.070, "dur": 28.136, + "args": { + "External id": 297659,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368815472.159, "dur": 18.552, + "args": { + "External id": 297660,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368815503.604, "dur": 15.500, + "args": { + "External id": 297661,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815529.429, "dur": 28.399, + "args": { + "External id": 297662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815561.099, "dur": 34.350, + "args": { + "External id": 297663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368815667.506, "dur": 174.896, + "args": { + "External id": 297664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368815747.114, "dur": 6.609, + "args": { + "External id": 297665,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368815756.137, "dur": 3.246, + "args": { + "External id": 297666,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368815874.399, "dur": 24.858, + "args": { + "External id": 297667,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368815910.804, "dur": 14.671, + "args": { + "External id": 297668,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815933.803, "dur": 41.136, + "args": { + "External id": 297669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368815980.880, "dur": 37.476, + "args": { + "External id": 297670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368816024.877, "dur": 22.904, + "args": { + "External id": 297671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368816055.836, "dur": 29.033, + "args": { + "External id": 297672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368816091.330, "dur": 23.970, + "args": { + "External id": 297673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368816121.682, "dur": 28.370, + "args": { + "External id": 297674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368816182.753, "dur": 24.679, + "args": { + "External id": 297675,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368816226.773, "dur": 23.554, + "args": { + "External id": 297676,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368816265.959, "dur": 17.368, + "args": { + "External id": 297677,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368816298.462, "dur": 26.766, + "args": { + "External id": 297678,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368816347.141, "dur": 23.939, + "args": { + "External id": 297679,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816443.440, "dur": 17.232, + "args": { + "External id": 297680,"Record function id": 0, "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816446.437, "dur": 13.320, + "args": { + "External id": 297681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816450.287, "dur": 8.497, + "args": { + "External id": 297682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816454.574, "dur": 4.093, + "args": { + "External id": 297683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816464.563, "dur": 4.344, + "args": { + "External id": 297684,"Record function id": 0, "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816466.125, "dur": 2.332, + "args": { + "External id": 297685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816466.726, "dur": 1.268, + "args": { + "External id": 297686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816467.074, "dur": 0.842, + "args": { + "External id": 297687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816472.096, "dur": 4.288, + "args": { + "External id": 297688,"Record function id": 0, "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816473.452, "dur": 2.520, + "args": { + "External id": 297689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816474.006, "dur": 1.541, + "args": { + "External id": 297690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816474.661, "dur": 0.777, + "args": { + "External id": 297691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816479.594, "dur": 3.522, + "args": { + "External id": 297692,"Record function id": 0, "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816480.463, "dur": 2.253, + "args": { + "External id": 297693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816480.949, "dur": 1.316, + "args": { + "External id": 297694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816481.429, "dur": 0.752, + "args": { + "External id": 297695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816486.305, "dur": 4.097, + "args": { + "External id": 297696,"Record function id": 0, "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816487.598, "dur": 2.374, + "args": { + "External id": 297697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816488.290, "dur": 1.160, + "args": { + "External id": 297698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816488.563, "dur": 0.809, + "args": { + "External id": 297699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816493.545, "dur": 4.162, + "args": { + "External id": 297700,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816494.691, "dur": 2.632, + "args": { + "External id": 297701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816495.196, "dur": 1.676, + "args": { + "External id": 297702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816495.698, "dur": 1.107, + "args": { + "External id": 297703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816500.898, "dur": 3.767, + "args": { + "External id": 297704,"Record function id": 0, "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816502.011, "dur": 2.254, + "args": { + "External id": 297705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816502.465, "dur": 1.356, + "args": { + "External id": 297706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816502.837, "dur": 0.909, + "args": { + "External id": 297707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816507.781, "dur": 6.600, + "args": { + "External id": 297708,"Record function id": 0, "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816511.777, "dur": 2.197, + "args": { + "External id": 297709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816512.246, "dur": 1.288, + "args": { + "External id": 297710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816512.563, "dur": 0.895, + "args": { + "External id": 297711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816517.470, "dur": 3.548, + "args": { + "External id": 297712,"Record function id": 0, "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368816518.463, "dur": 2.148, + "args": { + "External id": 297713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816518.960, "dur": 1.226, + "args": { + "External id": 297714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368816519.290, "dur": 0.820, + "args": { + "External id": 297715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368816524.969, "dur": 36770.953, + "args": { + "External id": 297716,"Record function id": 0, "Sequence number": 1209167, "Fwd thread id": 1, "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368816526.219, "dur": 36760.397, + "args": { + "External id": 297717,"Sequence number": 1209167, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4340 + } + }, + { + "ph": "f", "id": 65, "pid": 2070547, "tid": 2107622, "ts": 5333368816526.219, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2070547, "tid": 2107622, + "ts": 5333368816554.303, "dur": 36.623, + "args": { + "External id": 297718,"Record function id": 0, "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2070547, "tid": 2107622, + "ts": 5333368816598.537, "dur": 102.934, + "args": { + "External id": 297719,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2070547, "tid": 2107622, + "ts": 5333368816709.963, "dur": 36568.220, + "args": { + "External id": 297720,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368816798.228, "dur": 7.332, + "args": { + "External id": 297721,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368816815.436, "dur": 5.041, + "args": { + "External id": 297722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368816833.478, "dur": 35632.304, + "args": { + "External id": 297723,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368816846.231, "dur": 35611.738, + "args": { + "External id": 297724,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368816887.052, "dur": 13.696, + "args": { + "External id": 297725,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368816906.844, "dur": 35511.996, + "args": { + "External id": 297726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368816909.439, "dur": 35508.742, + "args": { + "External id": 297727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368816912.715, "dur": 4.857, + "args": { + "External id": 297728,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368816919.208, "dur": 35495.713, + "args": { + "External id": 297729,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368852549.331, "dur": 9.174, + "args": { + "External id": 297730,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368852551.917, "dur": 6.210, + "args": { + "External id": 297731,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368852585.908, "dur": 399.289, + "args": { + "External id": 297732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368852607.901, "dur": 372.176, + "args": { + "External id": 297733,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4356, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368852618.509, "dur": 356.120, + "args": { + "External id": 297734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368853004.249, "dur": 2.233, + "args": { + "External id": 297735,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4358, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853066.760, "dur": 6.456, + "args": { + "External id": 297736,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853115.809, "dur": 1.599, + "args": { + "External id": 297737,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853131.811, "dur": 1.300, + "args": { + "External id": 297738,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853144.692, "dur": 1.200, + "args": { + "External id": 297739,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853155.500, "dur": 1.012, + "args": { + "External id": 297740,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853178.099, "dur": 2.005, + "args": { + "External id": 297741,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853195.683, "dur": 1.699, + "args": { + "External id": 297742,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853207.633, "dur": 1.079, + "args": { + "External id": 297743,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853216.956, "dur": 1.011, + "args": { + "External id": 297744,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368853311.156, "dur": 2623.285, + "args": { + "External id": 297745,"Record function id": 0, "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2070547, "tid": 2107622, + "ts": 5333368853329.058, "dur": 966.304, + "args": { + "External id": 297746,"Record function id": 0, "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070547, "tid": 2107622, + "ts": 5333368853343.200, "dur": 323.489, + "args": { + "External id": 297747,"Record function id": 0, "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853417.095, "dur": 3.878, + "args": { + "External id": 297748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853424.042, "dur": 1.118, + "args": { + "External id": 297749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853426.901, "dur": 0.953, + "args": { + "External id": 297750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853429.570, "dur": 0.864, + "args": { + "External id": 297751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853432.076, "dur": 1.088, + "args": { + "External id": 297752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853434.635, "dur": 1.178, + "args": { + "External id": 297753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853437.357, "dur": 1.011, + "args": { + "External id": 297754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853439.585, "dur": 0.975, + "args": { + "External id": 297755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853441.964, "dur": 1.177, + "args": { + "External id": 297756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368853444.519, "dur": 1.011, + "args": { + "External id": 297757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368853461.982, "dur": 136.981, + "args": { + "External id": 297758,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368853477.445, "dur": 117.446, + "args": { + "External id": 297759,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368853489.450, "dur": 12.592, + "args": { + "External id": 297760,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368853505.934, "dur": 62.253, + "args": { + "External id": 297761,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368853508.025, "dur": 59.687, + "args": { + "External id": 297762,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853511.052, "dur": 5.744, + "args": { + "External id": 297763,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368853518.429, "dur": 48.673, + "args": { + "External id": 297764,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2070547, "tid": 2107622, + "ts": 5333368853747.923, "dur": 539.697, + "args": { + "External id": 297765,"Record function id": 0, "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070547, "tid": 2107622, + "ts": 5333368853764.174, "dur": 510.418, + "args": { + "External id": 297766,"Record function id": 0, "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368853823.459, "dur": 6.177, + "args": { + "External id": 297767,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368853844.482, "dur": 24.743, + "args": { + "External id": 297768,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853849.040, "dur": 1.791, + "args": { + "External id": 297769,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853852.245, "dur": 0.700, + "args": { + "External id": 297770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853854.302, "dur": 0.571, + "args": { + "External id": 297771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853855.710, "dur": 0.795, + "args": { + "External id": 297772,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853857.469, "dur": 0.466, + "args": { + "External id": 297773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853859.316, "dur": 0.376, + "args": { + "External id": 297774,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853860.945, "dur": 0.501, + "args": { + "External id": 297775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853862.998, "dur": 0.600, + "args": { + "External id": 297776,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853864.359, "dur": 0.765, + "args": { + "External id": 297777,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368853880.577, "dur": 35.331, + "args": { + "External id": 297778,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368853944.247, "dur": 96.837, + "args": { + "External id": 297779,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368853953.846, "dur": 3.175, + "args": { + "External id": 297780,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368853961.795, "dur": 9.650, + "args": { + "External id": 297781,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368853965.827, "dur": 5.184, + "args": { + "External id": 297782,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853968.815, "dur": 0.461, + "args": { + "External id": 297783,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368853977.876, "dur": 25.916, + "args": { + "External id": 297784,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853979.418, "dur": 0.769, + "args": { + "External id": 297785,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853981.944, "dur": 0.416, + "args": { + "External id": 297786,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853983.581, "dur": 0.431, + "args": { + "External id": 297787,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853985.497, "dur": 0.717, + "args": { + "External id": 297788,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853987.150, "dur": 0.938, + "args": { + "External id": 297789,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853989.580, "dur": 0.424, + "args": { + "External id": 297790,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853991.577, "dur": 0.784, + "args": { + "External id": 297791,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853993.422, "dur": 0.722, + "args": { + "External id": 297792,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368853995.497, "dur": 0.495, + "args": { + "External id": 297793,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368854013.166, "dur": 20.341, + "args": { + "External id": 297794,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368854081.871, "dur": 125.872, + "args": { + "External id": 297795,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368854103.836, "dur": 100.319, + "args": { + "External id": 297796,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4419, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368854112.592, "dur": 86.816, + "args": { + "External id": 297797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368854222.531, "dur": 1.954, + "args": { + "External id": 297798,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4421, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368854302.797, "dur": 1611.057, + "args": { + "External id": 297799,"Sequence number": 1209166, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4422 + } + }, + { + "ph": "f", "id": 66, "pid": 2070547, "tid": 2107622, "ts": 5333368854302.797, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368854408.238, "dur": 105.790, + "args": { + "External id": 297800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368854547.820, "dur": 37.040, + "args": { + "External id": 297801,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368854601.271, "dur": 102.083, + "args": { + "External id": 297802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368854718.089, "dur": 42.254, + "args": { + "External id": 297803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368854766.654, "dur": 44.682, + "args": { + "External id": 297804,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368854818.421, "dur": 27.630, + "args": { + "External id": 297805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368854853.202, "dur": 41.853, + "args": { + "External id": 297806,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368854917.921, "dur": 25.923, + "args": { + "External id": 297807,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368854961.331, "dur": 27.840, + "args": { + "External id": 297808,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368855010.419, "dur": 19.847, + "args": { + "External id": 297809,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368855043.166, "dur": 15.762, + "args": { + "External id": 297810,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855067.179, "dur": 28.506, + "args": { + "External id": 297811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855098.848, "dur": 31.692, + "args": { + "External id": 297812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368855156.959, "dur": 186.509, + "args": { + "External id": 297813,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368855249.055, "dur": 6.744, + "args": { + "External id": 297814,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368855257.713, "dur": 3.743, + "args": { + "External id": 297815,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368855371.846, "dur": 24.197, + "args": { + "External id": 297816,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368855406.557, "dur": 16.217, + "args": { + "External id": 297817,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855430.195, "dur": 36.382, + "args": { + "External id": 297818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855472.273, "dur": 35.016, + "args": { + "External id": 297819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855513.746, "dur": 20.789, + "args": { + "External id": 297820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855539.908, "dur": 28.603, + "args": { + "External id": 297821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855574.381, "dur": 23.113, + "args": { + "External id": 297822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368855604.425, "dur": 69.349, + "args": { + "External id": 297823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368855697.193, "dur": 23.472, + "args": { + "External id": 297824,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368855737.049, "dur": 25.473, + "args": { + "External id": 297825,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368855778.215, "dur": 34.386, + "args": { + "External id": 297826,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368855837.242, "dur": 16.314, + "args": { + "External id": 297827,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368855867.307, "dur": 18.085, + "args": { + "External id": 297828,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855958.893, "dur": 14.307, + "args": { + "External id": 297829,"Record function id": 0, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855961.887, "dur": 10.373, + "args": { + "External id": 297830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855965.915, "dur": 5.514, + "args": { + "External id": 297831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855967.063, "dur": 4.275, + "args": { + "External id": 297832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855976.975, "dur": 4.557, + "args": { + "External id": 297833,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855978.279, "dur": 2.841, + "args": { + "External id": 297834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855979.389, "dur": 1.256, + "args": { + "External id": 297835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855979.699, "dur": 0.867, + "args": { + "External id": 297836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855984.802, "dur": 3.980, + "args": { + "External id": 297837,"Record function id": 0, "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855985.852, "dur": 2.509, + "args": { + "External id": 297838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855986.375, "dur": 1.536, + "args": { + "External id": 297839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855986.792, "dur": 1.037, + "args": { + "External id": 297840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855992.048, "dur": 3.843, + "args": { + "External id": 297841,"Record function id": 0, "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855993.135, "dur": 2.347, + "args": { + "External id": 297842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855993.757, "dur": 1.309, + "args": { + "External id": 297843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368855994.304, "dur": 0.687, + "args": { + "External id": 297844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368855998.991, "dur": 4.087, + "args": { + "External id": 297845,"Record function id": 0, "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856000.203, "dur": 2.465, + "args": { + "External id": 297846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856000.678, "dur": 1.461, + "args": { + "External id": 297847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856001.108, "dur": 0.971, + "args": { + "External id": 297848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856009.479, "dur": 4.324, + "args": { + "External id": 297849,"Record function id": 0, "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856010.784, "dur": 2.622, + "args": { + "External id": 297850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856011.646, "dur": 1.353, + "args": { + "External id": 297851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856012.100, "dur": 0.831, + "args": { + "External id": 297852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856017.086, "dur": 3.513, + "args": { + "External id": 297853,"Record function id": 0, "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856018.117, "dur": 2.059, + "args": { + "External id": 297854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856018.605, "dur": 1.139, + "args": { + "External id": 297855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856018.891, "dur": 0.780, + "args": { + "External id": 297856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856023.608, "dur": 4.120, + "args": { + "External id": 297857,"Record function id": 0, "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856024.796, "dur": 2.529, + "args": { + "External id": 297858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856025.443, "dur": 1.465, + "args": { + "External id": 297859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856025.742, "dur": 1.096, + "args": { + "External id": 297860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856030.842, "dur": 4.013, + "args": { + "External id": 297861,"Record function id": 0, "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368856031.868, "dur": 2.571, + "args": { + "External id": 297862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856032.366, "dur": 1.635, + "args": { + "External id": 297863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368856032.665, "dur": 1.271, + "args": { + "External id": 297864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368856038.714, "dur": 36463.665, + "args": { + "External id": 297865,"Record function id": 0, "Sequence number": 1209165, "Fwd thread id": 1, "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368856040.030, "dur": 36453.538, + "args": { + "External id": 297866,"Sequence number": 1209165, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4489 + } + }, + { + "ph": "f", "id": 67, "pid": 2070547, "tid": 2107622, "ts": 5333368856040.030, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2070547, "tid": 2107622, + "ts": 5333368856067.566, "dur": 34.993, + "args": { + "External id": 297867,"Record function id": 0, "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2070547, "tid": 2107622, + "ts": 5333368856109.951, "dur": 77.885, + "args": { + "External id": 297868,"Record function id": 0, "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2070547, "tid": 2107622, + "ts": 5333368856195.029, "dur": 36290.881, + "args": { + "External id": 297869,"Record function id": 0, "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368856283.125, "dur": 6.687, + "args": { + "External id": 297870,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368856299.161, "dur": 4.736, + "args": { + "External id": 297871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368856317.248, "dur": 35331.706, + "args": { + "External id": 297872,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368856330.246, "dur": 35286.012, + "args": { + "External id": 297873,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368856370.216, "dur": 17.398, + "args": { + "External id": 297874,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368856393.493, "dur": 35185.432, + "args": { + "External id": 297875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368856395.806, "dur": 35182.319, + "args": { + "External id": 297876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368856399.399, "dur": 5.634, + "args": { + "External id": 297877,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368856406.584, "dur": 35167.792, + "args": { + "External id": 297878,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368891740.472, "dur": 10.046, + "args": { + "External id": 297879,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368891743.270, "dur": 6.835, + "args": { + "External id": 297880,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368891777.164, "dur": 430.945, + "args": { + "External id": 297881,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368891802.066, "dur": 400.687, + "args": { + "External id": 297882,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4505, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368891812.586, "dur": 383.916, + "args": { + "External id": 297883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368892228.813, "dur": 2.221, + "args": { + "External id": 297884,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4507, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892291.850, "dur": 6.747, + "args": { + "External id": 297885,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892340.570, "dur": 1.410, + "args": { + "External id": 297886,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892356.494, "dur": 1.185, + "args": { + "External id": 297887,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892370.301, "dur": 1.113, + "args": { + "External id": 297888,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892381.531, "dur": 1.586, + "args": { + "External id": 297889,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892392.867, "dur": 0.967, + "args": { + "External id": 297890,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892404.809, "dur": 1.255, + "args": { + "External id": 297891,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892416.131, "dur": 1.245, + "args": { + "External id": 297892,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892426.812, "dur": 1.007, + "args": { + "External id": 297893,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368892516.535, "dur": 2607.495, + "args": { + "External id": 297894,"Record function id": 0, "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2070547, "tid": 2107622, + "ts": 5333368892535.452, "dur": 973.648, + "args": { + "External id": 297895,"Record function id": 0, "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070547, "tid": 2107622, + "ts": 5333368892548.331, "dur": 335.500, + "args": { + "External id": 297896,"Record function id": 0, "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892661.922, "dur": 5.114, + "args": { + "External id": 297897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892670.744, "dur": 1.072, + "args": { + "External id": 297898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892673.569, "dur": 1.149, + "args": { + "External id": 297899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892676.441, "dur": 1.126, + "args": { + "External id": 297900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892679.077, "dur": 1.391, + "args": { + "External id": 297901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892682.038, "dur": 1.805, + "args": { + "External id": 297902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892685.525, "dur": 1.569, + "args": { + "External id": 297903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892688.571, "dur": 1.346, + "args": { + "External id": 297904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892691.336, "dur": 1.509, + "args": { + "External id": 297905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368892694.356, "dur": 1.447, + "args": { + "External id": 297906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368892713.604, "dur": 140.815, + "args": { + "External id": 297907,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368892729.444, "dur": 120.518, + "args": { + "External id": 297908,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368892741.353, "dur": 12.322, + "args": { + "External id": 297909,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368892757.739, "dur": 64.300, + "args": { + "External id": 297910,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368892760.326, "dur": 61.397, + "args": { + "External id": 297911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368892763.608, "dur": 6.077, + "args": { + "External id": 297912,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368892771.228, "dur": 50.087, + "args": { + "External id": 297913,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2070547, "tid": 2107622, + "ts": 5333368892965.625, "dur": 535.985, + "args": { + "External id": 297914,"Record function id": 0, "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070547, "tid": 2107622, + "ts": 5333368892980.875, "dur": 508.351, + "args": { + "External id": 297915,"Record function id": 0, "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368893039.586, "dur": 4.940, + "args": { + "External id": 297916,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368893059.587, "dur": 24.488, + "args": { + "External id": 297917,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893063.759, "dur": 1.459, + "args": { + "External id": 297918,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893066.678, "dur": 0.321, + "args": { + "External id": 297919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893068.159, "dur": 0.438, + "args": { + "External id": 297920,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893069.598, "dur": 0.534, + "args": { + "External id": 297921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893071.317, "dur": 0.713, + "args": { + "External id": 297922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893073.663, "dur": 0.496, + "args": { + "External id": 297923,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893075.337, "dur": 0.889, + "args": { + "External id": 297924,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893077.247, "dur": 0.697, + "args": { + "External id": 297925,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893079.841, "dur": 0.658, + "args": { + "External id": 297926,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368893093.518, "dur": 29.703, + "args": { + "External id": 297927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2107622, + "ts": 5333368893152.433, "dur": 120.675, + "args": { + "External id": 297928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368893161.096, "dur": 2.816, + "args": { + "External id": 297929,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2107622, + "ts": 5333368893184.282, "dur": 11.462, + "args": { + "External id": 297930,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2107622, + "ts": 5333368893188.151, "dur": 7.187, + "args": { + "External id": 297931,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893192.292, "dur": 1.012, + "args": { + "External id": 297932,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2107622, + "ts": 5333368893203.364, "dur": 25.877, + "args": { + "External id": 297933,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893205.304, "dur": 0.866, + "args": { + "External id": 297934,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893207.779, "dur": 0.389, + "args": { + "External id": 297935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893210.079, "dur": 0.868, + "args": { + "External id": 297936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893211.942, "dur": 0.667, + "args": { + "External id": 297937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893214.501, "dur": 0.804, + "args": { + "External id": 297938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893216.315, "dur": 0.503, + "args": { + "External id": 297939,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893217.569, "dur": 0.580, + "args": { + "External id": 297940,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893219.555, "dur": 0.712, + "args": { + "External id": 297941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368893221.475, "dur": 0.481, + "args": { + "External id": 297942,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368893242.800, "dur": 22.668, + "args": { + "External id": 297943,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368893316.275, "dur": 107.121, + "args": { + "External id": 297944,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368893337.427, "dur": 82.900, + "args": { + "External id": 297945,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4568, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368893345.796, "dur": 69.131, + "args": { + "External id": 297946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368893437.183, "dur": 1.771, + "args": { + "External id": 297947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4570, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368893516.442, "dur": 1583.871, + "args": { + "External id": 297948,"Sequence number": 1209164, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4571 + } + }, + { + "ph": "f", "id": 68, "pid": 2070547, "tid": 2107622, "ts": 5333368893516.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368893663.802, "dur": 105.889, + "args": { + "External id": 297949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368893808.361, "dur": 40.052, + "args": { + "External id": 297950,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368893865.428, "dur": 48.266, + "args": { + "External id": 297951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368893922.919, "dur": 30.671, + "args": { + "External id": 297952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368893959.951, "dur": 44.146, + "args": { + "External id": 297953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894010.869, "dur": 27.022, + "args": { + "External id": 297954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894044.540, "dur": 40.647, + "args": { + "External id": 297955,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368894105.654, "dur": 21.783, + "args": { + "External id": 297956,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368894144.678, "dur": 45.250, + "args": { + "External id": 297957,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368894213.651, "dur": 20.264, + "args": { + "External id": 297958,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368894246.606, "dur": 16.023, + "args": { + "External id": 297959,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894270.405, "dur": 31.992, + "args": { + "External id": 297960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894305.574, "dur": 32.687, + "args": { + "External id": 297961,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368894363.837, "dur": 166.362, + "args": { + "External id": 297962,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368894438.072, "dur": 6.070, + "args": { + "External id": 297963,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368894446.018, "dur": 3.088, + "args": { + "External id": 297964,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368894561.918, "dur": 23.839, + "args": { + "External id": 297965,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368894597.116, "dur": 16.049, + "args": { + "External id": 297966,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894656.956, "dur": 42.902, + "args": { + "External id": 297967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894707.051, "dur": 34.781, + "args": { + "External id": 297968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894749.559, "dur": 24.414, + "args": { + "External id": 297969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894778.586, "dur": 30.187, + "args": { + "External id": 297970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894814.398, "dur": 23.124, + "args": { + "External id": 297971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368894844.464, "dur": 28.723, + "args": { + "External id": 297972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368894891.903, "dur": 21.381, + "args": { + "External id": 297973,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368894931.366, "dur": 22.514, + "args": { + "External id": 297974,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368894969.492, "dur": 29.679, + "args": { + "External id": 297975,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368895023.415, "dur": 18.560, + "args": { + "External id": 297976,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368895054.335, "dur": 17.940, + "args": { + "External id": 297977,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895145.900, "dur": 14.901, + "args": { + "External id": 297978,"Record function id": 0, "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895149.068, "dur": 10.742, + "args": { + "External id": 297979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895152.642, "dur": 6.272, + "args": { + "External id": 297980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895154.039, "dur": 4.749, + "args": { + "External id": 297981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895184.203, "dur": 18.547, + "args": { + "External id": 297982,"Record function id": 0, "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895187.341, "dur": 14.646, + "args": { + "External id": 297983,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895188.496, "dur": 12.636, + "args": { + "External id": 297984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895199.247, "dur": 1.629, + "args": { + "External id": 297985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895207.458, "dur": 4.233, + "args": { + "External id": 297986,"Record function id": 0, "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895208.784, "dur": 2.486, + "args": { + "External id": 297987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895209.404, "dur": 1.392, + "args": { + "External id": 297988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895209.777, "dur": 0.928, + "args": { + "External id": 297989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895214.860, "dur": 3.839, + "args": { + "External id": 297990,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895215.967, "dur": 2.333, + "args": { + "External id": 297991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895216.453, "dur": 1.426, + "args": { + "External id": 297992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895217.071, "dur": 0.728, + "args": { + "External id": 297993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895221.823, "dur": 7.236, + "args": { + "External id": 297994,"Record function id": 0, "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895223.200, "dur": 5.442, + "args": { + "External id": 297995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895223.874, "dur": 4.369, + "args": { + "External id": 297996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895227.226, "dur": 0.950, + "args": { + "External id": 297997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895232.107, "dur": 3.399, + "args": { + "External id": 297998,"Record function id": 0, "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895233.064, "dur": 2.015, + "args": { + "External id": 297999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895233.510, "dur": 1.163, + "args": { + "External id": 298000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895233.889, "dur": 0.713, + "args": { + "External id": 298001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895238.694, "dur": 3.951, + "args": { + "External id": 298002,"Record function id": 0, "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895239.925, "dur": 2.317, + "args": { + "External id": 298003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895240.481, "dur": 1.362, + "args": { + "External id": 298004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895240.998, "dur": 0.771, + "args": { + "External id": 298005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895245.675, "dur": 3.800, + "args": { + "External id": 298006,"Record function id": 0, "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895246.761, "dur": 2.270, + "args": { + "External id": 298007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895247.328, "dur": 1.283, + "args": { + "External id": 298008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895247.782, "dur": 0.754, + "args": { + "External id": 298009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895252.472, "dur": 3.772, + "args": { + "External id": 298010,"Record function id": 0, "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368895253.381, "dur": 2.464, + "args": { + "External id": 298011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895254.042, "dur": 1.380, + "args": { + "External id": 298012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368895254.537, "dur": 0.812, + "args": { + "External id": 298013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368895260.015, "dur": 37309.943, + "args": { + "External id": 298014,"Record function id": 0, "Sequence number": 1209163, "Fwd thread id": 1, "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368895261.205, "dur": 37300.173, + "args": { + "External id": 298015,"Sequence number": 1209163, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4638 + } + }, + { + "ph": "f", "id": 69, "pid": 2070547, "tid": 2107622, "ts": 5333368895261.205, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2070547, "tid": 2107622, + "ts": 5333368895293.345, "dur": 35.225, + "args": { + "External id": 298016,"Record function id": 0, "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2070547, "tid": 2107622, + "ts": 5333368895336.014, "dur": 66.883, + "args": { + "External id": 298017,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2070547, "tid": 2107622, + "ts": 5333368895408.618, "dur": 37145.074, + "args": { + "External id": 298018,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368895500.080, "dur": 6.761, + "args": { + "External id": 298019,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368895518.784, "dur": 4.596, + "args": { + "External id": 298020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368895536.927, "dur": 36273.024, + "args": { + "External id": 298021,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368895549.561, "dur": 36251.762, + "args": { + "External id": 298022,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368895597.886, "dur": 13.818, + "args": { + "External id": 298023,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368895617.535, "dur": 36145.987, + "args": { + "External id": 298024,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368895657.010, "dur": 36105.816, + "args": { + "External id": 298025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368895661.452, "dur": 6.575, + "args": { + "External id": 298026,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368895669.881, "dur": 36089.735, + "args": { + "External id": 298027,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368931894.872, "dur": 8.836, + "args": { + "External id": 298028,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368931897.515, "dur": 5.793, + "args": { + "External id": 298029,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368931930.221, "dur": 349.900, + "args": { + "External id": 298030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368931952.256, "dur": 322.568, + "args": { + "External id": 298031,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4654, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368931962.201, "dur": 307.411, + "args": { + "External id": 298032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368932298.376, "dur": 2.128, + "args": { + "External id": 298033,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4656, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932357.913, "dur": 6.362, + "args": { + "External id": 298034,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932406.316, "dur": 1.420, + "args": { + "External id": 298035,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932422.145, "dur": 1.434, + "args": { + "External id": 298036,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932434.601, "dur": 1.004, + "args": { + "External id": 298037,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932446.259, "dur": 0.894, + "args": { + "External id": 298038,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932456.380, "dur": 1.057, + "args": { + "External id": 298039,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932467.476, "dur": 0.794, + "args": { + "External id": 298040,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932479.567, "dur": 1.614, + "args": { + "External id": 298041,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932492.086, "dur": 1.331, + "args": { + "External id": 298042,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368932583.738, "dur": 2075.234, + "args": { + "External id": 298043,"Record function id": 0, "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2070547, "tid": 2107622, + "ts": 5333368932603.299, "dur": 453.354, + "args": { + "External id": 298044,"Record function id": 0, "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070547, "tid": 2107622, + "ts": 5333368932618.542, "dur": 345.386, + "args": { + "External id": 298045,"Record function id": 0, "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932739.834, "dur": 6.000, + "args": { + "External id": 298046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932749.192, "dur": 1.277, + "args": { + "External id": 298047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932752.283, "dur": 1.933, + "args": { + "External id": 298048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932758.973, "dur": 1.596, + "args": { + "External id": 298049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932761.934, "dur": 1.235, + "args": { + "External id": 298050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932764.315, "dur": 1.248, + "args": { + "External id": 298051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932766.950, "dur": 1.378, + "args": { + "External id": 298052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932769.639, "dur": 1.353, + "args": { + "External id": 298053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932772.239, "dur": 1.430, + "args": { + "External id": 298054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368932775.244, "dur": 1.598, + "args": { + "External id": 298055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368932794.788, "dur": 140.862, + "args": { + "External id": 298056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368932810.771, "dur": 120.831, + "args": { + "External id": 298057,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368932823.540, "dur": 12.918, + "args": { + "External id": 298058,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368932840.301, "dur": 64.396, + "args": { + "External id": 298059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368932842.801, "dur": 61.478, + "args": { + "External id": 298060,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368932846.147, "dur": 5.996, + "args": { + "External id": 298061,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368932853.755, "dur": 49.960, + "args": { + "External id": 298062,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368933064.453, "dur": 1536.017, + "args": { + "External id": 298063,"Sequence number": 1209162, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4686 + } + }, + { + "ph": "f", "id": 70, "pid": 2070547, "tid": 2107622, "ts": 5333368933064.453, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933182.504, "dur": 104.208, + "args": { + "External id": 298064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368933326.834, "dur": 37.893, + "args": { + "External id": 298065,"kernel_hash": "c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/5t/c5tnhs5xq5qc7yr3l7cgs4vzxtk3cliycxpzqbsxievrlofzpfhr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933381.251, "dur": 48.387, + "args": { + "External id": 298066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933438.875, "dur": 31.499, + "args": { + "External id": 298067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933475.885, "dur": 45.558, + "args": { + "External id": 298068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933527.462, "dur": 27.345, + "args": { + "External id": 298069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933562.539, "dur": 42.084, + "args": { + "External id": 298070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368933666.117, "dur": 27.898, + "args": { + "External id": 298071,"kernel_hash": "cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/mm/cmmtfvdkgvqhsz243szg36eql3ea4ycfjtkqpfgjhi44lbeji46k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368933714.023, "dur": 28.249, + "args": { + "External id": 298072,"kernel_hash": "c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4b/c4bvpuxlruf4ls6zaebh4ulq5kwzna7xpak4xntuufnkxt6y4wc2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368933763.739, "dur": 19.638, + "args": { + "External id": 298073,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368933796.411, "dur": 16.057, + "args": { + "External id": 298074,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933821.248, "dur": 32.441, + "args": { + "External id": 298075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368933856.800, "dur": 33.116, + "args": { + "External id": 298076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070547, "tid": 2107622, + "ts": 5333368933916.685, "dur": 166.443, + "args": { + "External id": 298077,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368933990.709, "dur": 6.301, + "args": { + "External id": 298078,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368933998.863, "dur": 2.896, + "args": { + "External id": 298079,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368934115.850, "dur": 25.103, + "args": { + "External id": 298080,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368934151.388, "dur": 33.023, + "args": { + "External id": 298081,"kernel_hash": "cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/d3/cd3ye4lv3yv2y7l5dde5joralcg7ll6hlrrxvigczijvlwrp2l6t.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368934195.837, "dur": 39.648, + "args": { + "External id": 298082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368934241.516, "dur": 35.430, + "args": { + "External id": 298083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368934283.647, "dur": 21.144, + "args": { + "External id": 298084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368934309.838, "dur": 29.058, + "args": { + "External id": 298085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368934345.044, "dur": 20.663, + "args": { + "External id": 298086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2107622, + "ts": 5333368934372.633, "dur": 28.530, + "args": { + "External id": 298087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070547, "tid": 2107622, + "ts": 5333368934418.536, "dur": 21.913, + "args": { + "External id": 298088,"kernel_hash": "cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/as/cas6dq7e5rikp56ssrhbxwcqp4ttdyuwqorivviwplq7rgsqykgf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368934457.398, "dur": 23.776, + "args": { + "External id": 298089,"kernel_hash": "cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ny/cnyczubjxqoksn4kug4drlj6rdh2oloxs37rovzz3o7ct4qcv4od.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368934496.092, "dur": 17.086, + "args": { + "External id": 298090,"kernel_hash": "c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kpn4mfmx5wt5gmjhm7dpm4gulqcmz5mcm7dlheptufg56to5av.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070547, "tid": 2107622, + "ts": 5333368934526.706, "dur": 15.003, + "args": { + "External id": 298091,"kernel_hash": "cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/yf/cyfeqwmtr3blvks63uqsw3zds2osxyp6k5wrjhmcluio7cothlaw.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070547, "tid": 2107622, + "ts": 5333368934554.328, "dur": 17.714, + "args": { + "External id": 298092,"kernel_hash": "c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/25/c25b4nhnmm2ykwmscwd3eqlx5bkfkwuzdtym5ifpvjo2smy2yhw6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934681.735, "dur": 15.734, + "args": { + "External id": 298093,"Record function id": 0, "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934685.192, "dur": 11.160, + "args": { + "External id": 298094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934689.271, "dur": 5.935, + "args": { + "External id": 298095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934690.457, "dur": 4.665, + "args": { + "External id": 298096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934701.391, "dur": 5.107, + "args": { + "External id": 298097,"Record function id": 0, "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934702.785, "dur": 3.046, + "args": { + "External id": 298098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934703.862, "dur": 1.410, + "args": { + "External id": 298099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934704.197, "dur": 0.995, + "args": { + "External id": 298100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934709.749, "dur": 4.173, + "args": { + "External id": 298101,"Record function id": 0, "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934711.115, "dur": 2.357, + "args": { + "External id": 298102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934711.581, "dur": 1.410, + "args": { + "External id": 298103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934712.039, "dur": 0.834, + "args": { + "External id": 298104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934717.043, "dur": 4.026, + "args": { + "External id": 298105,"Record function id": 0, "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934718.528, "dur": 2.136, + "args": { + "External id": 298106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934719.137, "dur": 1.095, + "args": { + "External id": 298107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934719.417, "dur": 0.729, + "args": { + "External id": 298108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934724.155, "dur": 4.392, + "args": { + "External id": 298109,"Record function id": 0, "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934725.567, "dur": 2.563, + "args": { + "External id": 298110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934726.441, "dur": 1.210, + "args": { + "External id": 298111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934726.720, "dur": 0.856, + "args": { + "External id": 298112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934731.692, "dur": 3.972, + "args": { + "External id": 298113,"Record function id": 0, "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934733.076, "dur": 2.175, + "args": { + "External id": 298114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934733.536, "dur": 1.316, + "args": { + "External id": 298115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934734.057, "dur": 0.719, + "args": { + "External id": 298116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934738.944, "dur": 4.558, + "args": { + "External id": 298117,"Record function id": 0, "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934740.316, "dur": 2.573, + "args": { + "External id": 298118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934740.787, "dur": 1.658, + "args": { + "External id": 298119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934741.399, "dur": 0.972, + "args": { + "External id": 298120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934746.786, "dur": 4.483, + "args": { + "External id": 298121,"Record function id": 0, "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934748.259, "dur": 2.591, + "args": { + "External id": 298122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934748.710, "dur": 1.725, + "args": { + "External id": 298123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934749.110, "dur": 1.256, + "args": { + "External id": 298124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934754.796, "dur": 3.818, + "args": { + "External id": 298125,"Record function id": 0, "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368934755.992, "dur": 2.206, + "args": { + "External id": 298126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934756.465, "dur": 1.318, + "args": { + "External id": 298127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368934756.808, "dur": 0.904, + "args": { + "External id": 298128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368934762.437, "dur": 37265.218, + "args": { + "External id": 298129,"Record function id": 0, "Sequence number": 1209161, "Fwd thread id": 1, "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368934763.983, "dur": 37255.478, + "args": { + "External id": 298130,"Sequence number": 1209161, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4753 + } + }, + { + "ph": "f", "id": 71, "pid": 2070547, "tid": 2107622, "ts": 5333368934763.983, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2070547, "tid": 2107622, + "ts": 5333368934794.286, "dur": 37.679, + "args": { + "External id": 298131,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2070547, "tid": 2107622, + "ts": 5333368934839.303, "dur": 76.296, + "args": { + "External id": 298132,"Record function id": 0, "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2070547, "tid": 2107622, + "ts": 5333368934921.237, "dur": 37090.566, + "args": { + "External id": 298133,"Record function id": 0, "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368935007.965, "dur": 6.438, + "args": { + "External id": 298134,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368935038.212, "dur": 5.411, + "args": { + "External id": 298135,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368935060.524, "dur": 36149.968, + "args": { + "External id": 298136,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368935074.137, "dur": 36128.047, + "args": { + "External id": 298137,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368935124.238, "dur": 14.551, + "args": { + "External id": 298138,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368935144.683, "dur": 36008.596, + "args": { + "External id": 298139,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368935150.169, "dur": 36002.369, + "args": { + "External id": 298140,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368935154.040, "dur": 5.498, + "args": { + "External id": 298141,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368935161.033, "dur": 35988.554, + "args": { + "External id": 298142,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368971294.808, "dur": 8.720, + "args": { + "External id": 298143,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368971297.331, "dur": 5.821, + "args": { + "External id": 298144,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368971330.024, "dur": 401.922, + "args": { + "External id": 298145,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368971354.803, "dur": 372.091, + "args": { + "External id": 298146,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4769, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368971365.618, "dur": 355.546, + "args": { + "External id": 298147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368971753.413, "dur": 1.999, + "args": { + "External id": 298148,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4771, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971815.466, "dur": 6.488, + "args": { + "External id": 298149,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971863.546, "dur": 1.551, + "args": { + "External id": 298150,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971880.073, "dur": 1.599, + "args": { + "External id": 298151,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971893.582, "dur": 1.226, + "args": { + "External id": 298152,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971905.668, "dur": 0.975, + "args": { + "External id": 298153,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971915.580, "dur": 1.240, + "args": { + "External id": 298154,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971926.782, "dur": 1.370, + "args": { + "External id": 298155,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971938.290, "dur": 1.545, + "args": { + "External id": 298156,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368971950.797, "dur": 1.137, + "args": { + "External id": 298157,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368972041.525, "dur": 283.958, + "args": { + "External id": 298158,"Record function id": 0, "Sequence number": 1209160, "Fwd thread id": 1, "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070547, "tid": 2107622, + "ts": 5333368972044.019, "dur": 273.230, + "args": { + "External id": 298159,"Sequence number": 1209160, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4782 + } + }, + { + "ph": "f", "id": 72, "pid": 2070547, "tid": 2107622, "ts": 5333368972044.019, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2070547, "tid": 2107622, + "ts": 5333368972159.447, "dur": 60.269, + "args": { + "External id": 298160,"kernel_hash": "cfr73mqqwqpdrss7r6csecalgrfowe5r3r24yaeyivzpezfoy4fz", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/fr/cfr73mqqwqpdrss7r6csecalgrfowe5r3r24yaeyivzpezfoy4fz.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2070547, "tid": 2107622, + "ts": 5333368972235.968, "dur": 27.569, + "args": { + "External id": 298161,"kernel_hash": "cxedqnbrx2bvln4bf3pphdmthxjm6nsxqowbdddlxye4bt3yfnsg", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/xe/cxedqnbrx2bvln4bf3pphdmthxjm6nsxqowbdddlxye4bt3yfnsg.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096], [16, 4096, 2048], [32000, 2048], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2070547, "tid": 2107622, + "ts": 5333368972280.622, "dur": 21.522, + "args": { + "External id": 298162,"kernel_hash": "c2xxuncnwkfhjvqk7hfnejnsljjkm6qo4ajwnumdr7vdpf4nk2iq", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/2x/c2xxuncnwkfhjvqk7hfnejnsljjkm6qo4ajwnumdr7vdpf4nk2iq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368972336.582, "dur": 13.352, + "args": { + "External id": 298163,"Record function id": 0, "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070547, "tid": 2107622, + "ts": 5333368972339.660, "dur": 9.307, + "args": { + "External id": 298164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368972343.168, "dur": 5.027, + "args": { + "External id": 298165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2107622, + "ts": 5333368972344.337, "dur": 3.775, + "args": { + "External id": 298166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2070547, "tid": 2107622, + "ts": 5333368972370.581, "dur": 11402.408, + "args": { + "External id": 298167,"Record function id": 0, "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2070547, "tid": 2107622, + "ts": 5333368972387.073, "dur": 20.420, + "args": { + "External id": 298168,"Record function id": 0, "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2070547, "tid": 2107622, + "ts": 5333368972413.010, "dur": 57.222, + "args": { + "External id": 298169,"Record function id": 0, "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2070547, "tid": 2107622, + "ts": 5333368972475.299, "dur": 10963.999, + "args": { + "External id": 298170,"Record function id": 0, "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368972541.470, "dur": 6.569, + "args": { + "External id": 298171,"Record function id": 0, "Concrete Inputs": ["[196610048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2107622, + "ts": 5333368972556.957, "dur": 4.846, + "args": { + "External id": 298172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368972575.414, "dur": 10035.503, + "args": { + "External id": 298173,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070547, "tid": 2107622, + "ts": 5333368972588.025, "dur": 10010.636, + "args": { + "External id": 298174,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368972912.058, "dur": 17.216, + "args": { + "External id": 298175,"Record function id": 0, "Concrete Inputs": ["[48027]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2107622, + "ts": 5333368972980.772, "dur": 9576.525, + "args": { + "External id": 298176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], [], []], "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2107622, + "ts": 5333368972983.422, "dur": 9573.171, + "args": { + "External id": 298177,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], []], "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368972986.914, "dur": 11.554, + "args": { + "External id": 298178,"Record function id": 0, "Concrete Inputs": ["[48027]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2107622, + "ts": 5333368973000.109, "dur": 9552.282, + "args": { + "External id": 298179,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[48027], [48027], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368982733.214, "dur": 10.975, + "args": { + "External id": 298180,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[196610048], [], [], [], [], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2107622, + "ts": 5333368982736.308, "dur": 7.299, + "args": { + "External id": 298181,"Record function id": 0, "Concrete Inputs": ["[24576256]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070547, "tid": 2107622, + "ts": 5333368982772.291, "dur": 437.651, + "args": { + "External id": 298182,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[24576256], [196610048], [], [], [], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368982798.060, "dur": 406.020, + "args": { + "External id": 298183,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 24576256, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[196610048], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4806, "In msg nelems": 196610048 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070547, "tid": 2107622, + "ts": 5333368982808.712, "dur": 389.473, + "args": { + "External id": 298184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[196610048]], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2107622, + "ts": 5333368983231.199, "dur": 2.364, + "args": { + "External id": 298185,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4808, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368983295.162, "dur": 6.425, + "args": { + "External id": 298186,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368983345.457, "dur": 1.401, + "args": { + "External id": 298187,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368983361.433, "dur": 2.137, + "args": { + "External id": 298188,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "8192256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2107622, + "ts": 5333368983374.902, "dur": 1.713, + "args": { + "External id": 298189,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "16384256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#14847", "pid": 2070547, "tid": 2070547, + "ts": 5333366952815.870, "dur": 2049236.834, + "args": { + "External id": 289281,"Record function id": 0, "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2070547, "tid": 2070547, + "ts": 5333366952848.369, "dur": 553.308, + "args": { + "External id": 289282,"Record function id": 0, "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2070547, "tid": 2070547, + "ts": 5333366953440.469, "dur": 2095.007, + "args": { + "External id": 289283,"Record function id": 0, "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366954464.275, "dur": 8.695, + "args": { + "External id": 289284,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070547, "tid": 2070547, + "ts": 5333366954494.299, "dur": 6.369, + "args": { + "External id": 289285,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366954994.247, "dur": 2.336, + "args": { + "External id": 289286,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070547, "tid": 2070547, + "ts": 5333366955005.654, "dur": 2.530, + "args": { + "External id": 289287,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366955416.339, "dur": 1.693, + "args": { + "External id": 289288,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070547, "tid": 2070547, + "ts": 5333366955422.241, "dur": 2.091, + "args": { + "External id": 289289,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333366956261.216, "dur": 15.999, + "args": { + "External id": 289290,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366956270.176, "dur": 2.790, + "args": { + "External id": 289291,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333366956278.762, "dur": 4.431, + "args": { + "External id": 289292,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366956280.868, "dur": 1.163, + "args": { + "External id": 289293,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366956309.909, "dur": 557.061, + "args": { + "External id": 289294,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366956317.992, "dur": 548.099, + "args": { + "External id": 289295,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366956328.263, "dur": 9.542, + "args": { + "External id": 289296,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366956339.978, "dur": 524.045, + "args": { + "External id": 289297,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366956350.188, "dur": 0.743, + "args": { + "External id": 289298,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070547, "tid": 2070547, + "ts": 5333366956354.043, "dur": 5.756, + "args": { + "External id": 289299,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[16, 4096], [16, 4096]], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070547, "tid": 2070547, + "ts": 5333366956355.854, "dur": 3.781, + "args": { + "External id": 289300,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[16, 4096], [], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366956358.685, "dur": 0.679, + "args": { + "External id": 289301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070547, "tid": 2070547, + "ts": 5333366956362.432, "dur": 207.141, + "args": { + "External id": 289302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070547, "tid": 2070547, + "ts": 5333366956365.102, "dur": 204.068, + "args": { + "External id": 289303,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333366956367.440, "dur": 17.172, + "args": { + "External id": 289304,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366956371.761, "dur": 12.360, + "args": { + "External id": 289305,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366956385.460, "dur": 183.346, + "args": { + "External id": 289306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366956571.511, "dur": 287.627, + "args": { + "External id": 289307,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366956885.152, "dur": 557.963, + "args": { + "External id": 289308,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366956887.417, "dur": 554.579, + "args": { + "External id": 289309,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366956895.569, "dur": 9.645, + "args": { + "External id": 289310,"Record function id": 0, "Concrete Inputs": ["[16, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366956909.146, "dur": 528.275, + "args": { + "External id": 289311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[16, 8192], [16, 8192], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070547, "tid": 2070547, + "ts": 5333366957475.679, "dur": 54.569, + "args": { + "External id": 289312,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366957481.861, "dur": 6.298, + "args": { + "External id": 289313,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070547, "tid": 2070547, + "ts": 5333366957491.205, "dur": 38.684, + "args": { + "External id": 289314,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333366957495.665, "dur": 6.590, + "args": { + "External id": 289315,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2070547, "tid": 2070547, + "ts": 5333366957541.110, "dur": 78.193, + "args": { + "External id": 289316,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070547, "tid": 2070547, + "ts": 5333366957548.460, "dur": 7.427, + "args": { + "External id": 289317,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366957553.777, "dur": 1.812, + "args": { + "External id": 289318,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 4850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366957557.094, "dur": 4.718, + "args": { + "External id": 289319,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2070547, "tid": 2070547, + "ts": 5333366957564.811, "dur": 4.162, + "args": { + "External id": 289320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[16, 4096]], "Ev Idx": 4852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070547, "tid": 2070547, + "ts": 5333366957571.858, "dur": 5.412, + "args": { + "External id": 289321,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366957576.259, "dur": 0.803, + "args": { + "External id": 289322,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070547, "tid": 2070547, + "ts": 5333366957578.370, "dur": 4.741, + "args": { + "External id": 289323,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366957582.297, "dur": 0.726, + "args": { + "External id": 289324,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070547, "tid": 2070547, + "ts": 5333366957585.023, "dur": 6.298, + "args": { + "External id": 289325,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [16, 1, 1, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070547, "tid": 2070547, + "ts": 5333366957588.352, "dur": 2.874, + "args": { + "External id": 289326,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366957590.322, "dur": 0.699, + "args": { + "External id": 289327,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366957592.429, "dur": 26.176, + "args": { + "External id": 289328,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[16, 1, 1, 4096], [16, 1, 1, 4096], []], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366957666.547, "dur": 32.041, + "args": { + "External id": 289329,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366957668.084, "dur": 30.323, + "args": { + "External id": 289330,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366957673.467, "dur": 4.546, + "args": { + "External id": 289331,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366957678.909, "dur": 18.964, + "args": { + "External id": 289332,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333366957816.035, "dur": 150.160, + "args": { + "External id": 289333,"Record function id": 0, "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2070547, "tid": 2070547, + "ts": 5333366957896.506, "dur": 58.704, + "args": { + "External id": 289334,"Record function id": 0, "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333366957974.081, "dur": 43.083, + "args": { + "External id": 289335,"Record function id": 0, "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333366958025.122, "dur": 7382.042, + "args": { + "External id": 289336,"Record function id": 0, "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2070547, "tid": 2070547, + "ts": 5333366958034.293, "dur": 872.986, + "args": { + "External id": 289337,"Record function id": 0, "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366958095.667, "dur": 6.340, + "args": { + "External id": 289338,"Record function id": 0, "Concrete Inputs": ["[24576256]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366958115.875, "dur": 19.970, + "args": { + "External id": 289339,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958119.767, "dur": 1.486, + "args": { + "External id": 289340,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958127.349, "dur": 0.268, + "args": { + "External id": 289341,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958128.554, "dur": 0.372, + "args": { + "External id": 289342,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "8192256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958129.828, "dur": 1.750, + "args": { + "External id": 289343,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "16384256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366958149.591, "dur": 68.259, + "args": { + "External id": 289344,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333366958250.793, "dur": 110.286, + "args": { + "External id": 289345,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "24576256", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [], [], [], [], [], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366958263.604, "dur": 4.404, + "args": { + "External id": 289346,"Record function id": 0, "Concrete Inputs": ["[196610048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333366958273.818, "dur": 12.359, + "args": { + "External id": 289347,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "24576256"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333366958278.544, "dur": 7.001, + "args": { + "External id": 289348,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "24576256", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[196610048], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958283.394, "dur": 0.586, + "args": { + "External id": 289349,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366958292.607, "dur": 15.902, + "args": { + "External id": 289350,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958294.377, "dur": 0.457, + "args": { + "External id": 289351,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958300.335, "dur": 0.190, + "args": { + "External id": 289352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958301.550, "dur": 0.356, + "args": { + "External id": 289353,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "8192256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366958302.756, "dur": 0.361, + "args": { + "External id": 289354,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "16384256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366958318.976, "dur": 34.063, + "args": { + "External id": 289355,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333366958408.575, "dur": 393.131, + "args": { + "External id": 289356,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[196610048], [24576256], [], [], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366958441.820, "dur": 353.923, + "args": { + "External id": 289357,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 196610048, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[24576256], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4889, "In msg nelems": 24576256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333366958453.643, "dur": 335.684, + "args": { + "External id": 289358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[24576256]], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366958830.299, "dur": 2.590, + "args": { + "External id": 289359,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4891, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2070547, "tid": 2070547, + "ts": 5333366958923.245, "dur": 6383.418, + "args": { + "External id": 289360,"Record function id": 0, "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366959001.819, "dur": 6.473, + "args": { + "External id": 289361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366959011.468, "dur": 2.570, + "args": { + "External id": 289362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366959015.743, "dur": 1.077, + "args": { + "External id": 289363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366959018.862, "dur": 1.312, + "args": { + "External id": 289364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366959021.590, "dur": 0.984, + "args": { + "External id": 289365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366959040.790, "dur": 6219.728, + "args": { + "External id": 289366,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366959058.060, "dur": 6194.594, + "args": { + "External id": 289367,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366959077.142, "dur": 5.652, + "args": { + "External id": 289368,"Record function id": 0, "Concrete Inputs": ["[3447]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366959089.359, "dur": 6126.681, + "args": { + "External id": 289369,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], [], []], "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366959091.840, "dur": 6123.463, + "args": { + "External id": 289370,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], []], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366959097.517, "dur": 6.055, + "args": { + "External id": 289371,"Record function id": 0, "Concrete Inputs": ["[3447]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366959105.092, "dur": 6106.647, + "args": { + "External id": 289372,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3447], [3447], []], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333366965484.522, "dur": 37.046, + "args": { + "External id": 289373,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2070547, "tid": 2070547, + "ts": 5333366965522.689, "dur": 245.982, + "args": { + "External id": 289374,"Record function id": 0, "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333366965564.018, "dur": 194.993, + "args": { + "External id": 289375,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[2048, 1], [4096, 1]], "Input Dims": [[32000, 2048], [16, 4096]], "Ev Idx": 4907 + } + }, + { + "ph": "s", "id": 72, "pid": 2070547, "tid": 2070547, "ts": 5333366965564.018, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2070547, "tid": 2070547, + "ts": 5333366965671.215, "dur": 50.656, + "args": { + "External id": 289376,"kernel_hash": "cepuvqida76mp3inhfyv5xcp3dgr5kc6gyyaupvhufqptth43nyf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ep/cepuvqida76mp3inhfyv5xcp3dgr5kc6gyyaupvhufqptth43nyf.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096], [32000, 2048], [16, 4096, 2048], []], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333366965830.786, "dur": 51.816, + "args": { + "External id": 289377,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2070547, "tid": 2070547, + "ts": 5333366965892.345, "dur": 7363.022, + "args": { + "External id": 289378,"Record function id": 0, "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070547, "tid": 2070547, + "ts": 5333366965900.316, "dur": 824.363, + "args": { + "External id": 289379,"Record function id": 0, "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366965967.355, "dur": 11.064, + "args": { + "External id": 289380,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366965990.195, "dur": 45.251, + "args": { + "External id": 289381,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366965999.959, "dur": 4.129, + "args": { + "External id": 289382,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966005.690, "dur": 0.598, + "args": { + "External id": 289383,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966007.158, "dur": 2.368, + "args": { + "External id": 289384,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966012.447, "dur": 0.379, + "args": { + "External id": 289385,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966013.502, "dur": 0.383, + "args": { + "External id": 289386,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966018.980, "dur": 0.194, + "args": { + "External id": 289387,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966019.935, "dur": 0.300, + "args": { + "External id": 289388,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966021.034, "dur": 0.175, + "args": { + "External id": 289389,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966026.199, "dur": 1.848, + "args": { + "External id": 289390,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366966046.453, "dur": 33.078, + "args": { + "External id": 289391,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333366966111.729, "dur": 132.832, + "args": { + "External id": 289392,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366966122.167, "dur": 3.715, + "args": { + "External id": 289393,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333366966130.798, "dur": 11.531, + "args": { + "External id": 289394,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333366966135.162, "dur": 6.740, + "args": { + "External id": 289395,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966139.913, "dur": 0.736, + "args": { + "External id": 289396,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366966148.799, "dur": 47.332, + "args": { + "External id": 289397,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966149.970, "dur": 0.661, + "args": { + "External id": 289398,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966154.415, "dur": 0.291, + "args": { + "External id": 289399,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966158.325, "dur": 0.574, + "args": { + "External id": 289400,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966160.932, "dur": 0.459, + "args": { + "External id": 289401,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966162.100, "dur": 2.664, + "args": { + "External id": 289402,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966165.686, "dur": 16.642, + "args": { + "External id": 289403,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966185.900, "dur": 0.402, + "args": { + "External id": 289404,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966189.410, "dur": 0.286, + "args": { + "External id": 289405,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966190.391, "dur": 0.338, + "args": { + "External id": 289406,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366966209.617, "dur": 25.571, + "args": { + "External id": 289407,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333366966296.194, "dur": 293.514, + "args": { + "External id": 289408,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366966323.967, "dur": 260.987, + "args": { + "External id": 289409,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4941, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333366966333.825, "dur": 246.209, + "args": { + "External id": 289410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366966611.436, "dur": 2.628, + "args": { + "External id": 289411,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4943, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070547, "tid": 2070547, + "ts": 5333366966746.613, "dur": 6184.934, + "args": { + "External id": 289412,"Record function id": 0, "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966843.860, "dur": 6.326, + "args": { + "External id": 289413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966853.717, "dur": 1.021, + "args": { + "External id": 289414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966856.258, "dur": 1.272, + "args": { + "External id": 289415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966859.372, "dur": 0.608, + "args": { + "External id": 289416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966861.446, "dur": 2.510, + "args": { + "External id": 289417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966865.126, "dur": 0.914, + "args": { + "External id": 289418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966867.471, "dur": 1.014, + "args": { + "External id": 289419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966873.975, "dur": 2.959, + "args": { + "External id": 289420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966878.268, "dur": 1.008, + "args": { + "External id": 289421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366966880.964, "dur": 0.654, + "args": { + "External id": 289422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366966898.356, "dur": 5964.977, + "args": { + "External id": 289423,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366966915.455, "dur": 5935.373, + "args": { + "External id": 289424,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366966939.922, "dur": 15.619, + "args": { + "External id": 289425,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366966959.651, "dur": 5839.556, + "args": { + "External id": 289426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366966962.227, "dur": 5836.039, + "args": { + "External id": 289427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366966967.992, "dur": 6.365, + "args": { + "External id": 289428,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366966976.111, "dur": 5817.305, + "args": { + "External id": 289429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333366973128.817, "dur": 82.512, + "args": { + "External id": 289430,"Sequence number": 1209161, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4962 + } + }, + { + "ph": "s", "id": 71, "pid": 2070547, "tid": 2070547, "ts": 5333366973128.817, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333366973150.965, "dur": 52.772, + "args": { + "External id": 289431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366973159.993, "dur": 43.013, + "args": { + "External id": 289432,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333366973312.734, "dur": 117.734, + "args": { + "External id": 289433,"Record function id": 0, "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333366973433.107, "dur": 2004.836, + "args": { + "External id": 289434,"Record function id": 0, "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333366973487.801, "dur": 1929.873, + "args": { + "External id": 289435,"Sequence number": 1209162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 4967 + } + }, + { + "ph": "s", "id": 70, "pid": 2070547, "tid": 2070547, "ts": 5333366973487.801, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333366973581.375, "dur": 131.850, + "args": { + "External id": 289436,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366973736.490, "dur": 124.436, + "args": { + "External id": 289437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366973876.660, "dur": 68.098, + "args": { + "External id": 289438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366973959.543, "dur": 51.692, + "args": { + "External id": 289439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333366974056.079, "dur": 44.664, + "args": { + "External id": 289440,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333366974131.188, "dur": 26.944, + "args": { + "External id": 289441,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333366974298.948, "dur": 294.575, + "args": { + "External id": 289442,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333366974471.259, "dur": 19.504, + "args": { + "External id": 289443,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366974478.127, "dur": 11.253, + "args": { + "External id": 289444,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366974494.182, "dur": 4.910, + "args": { + "External id": 289445,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366974502.499, "dur": 1.381, + "args": { + "External id": 289446,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366974507.042, "dur": 4.455, + "args": { + "External id": 289447,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366974615.752, "dur": 177.225, + "args": { + "External id": 289448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333366974856.399, "dur": 51.484, + "args": { + "External id": 289449,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366974925.903, "dur": 80.144, + "args": { + "External id": 289450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366975019.195, "dur": 56.044, + "args": { + "External id": 289451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333366975109.372, "dur": 36.988, + "args": { + "External id": 289452,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366975155.619, "dur": 85.748, + "args": { + "External id": 289453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333366975281.967, "dur": 28.423, + "args": { + "External id": 289454,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2070547, "tid": 2070547, + "ts": 5333366975523.274, "dur": 169.456, + "args": { + "External id": 289455,"Record function id": 0, "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333366975797.024, "dur": 69.267, + "args": { + "External id": 289456,"Record function id": 0, "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2070547, "tid": 2070547, + "ts": 5333366975879.550, "dur": 16848.244, + "args": { + "External id": 289457,"Record function id": 0, "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070547, "tid": 2070547, + "ts": 5333366975893.792, "dur": 1215.892, + "args": { + "External id": 289458,"Record function id": 0, "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366975997.799, "dur": 13.108, + "args": { + "External id": 289459,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366976027.084, "dur": 54.985, + "args": { + "External id": 289460,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976035.226, "dur": 4.839, + "args": { + "External id": 289461,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976045.523, "dur": 0.864, + "args": { + "External id": 289462,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976049.403, "dur": 0.378, + "args": { + "External id": 289463,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976053.634, "dur": 0.399, + "args": { + "External id": 289464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976055.278, "dur": 0.369, + "args": { + "External id": 289465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976058.921, "dur": 3.949, + "args": { + "External id": 289466,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976064.404, "dur": 0.311, + "args": { + "External id": 289467,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976066.268, "dur": 0.483, + "args": { + "External id": 289468,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976072.528, "dur": 0.433, + "args": { + "External id": 289469,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366976096.728, "dur": 63.424, + "args": { + "External id": 289470,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333366976243.054, "dur": 173.144, + "args": { + "External id": 289471,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366976261.055, "dur": 8.638, + "args": { + "External id": 289472,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333366976277.179, "dur": 14.252, + "args": { + "External id": 289473,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333366976282.777, "dur": 8.133, + "args": { + "External id": 289474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976287.445, "dur": 1.106, + "args": { + "External id": 289475,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366976302.125, "dur": 40.832, + "args": { + "External id": 289476,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976306.657, "dur": 0.482, + "args": { + "External id": 289477,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976309.191, "dur": 0.695, + "args": { + "External id": 289478,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976314.057, "dur": 1.798, + "args": { + "External id": 289479,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976318.783, "dur": 0.497, + "args": { + "External id": 289480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976320.608, "dur": 0.655, + "args": { + "External id": 289481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976324.767, "dur": 0.337, + "args": { + "External id": 289482,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976328.321, "dur": 0.485, + "args": { + "External id": 289483,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976329.930, "dur": 2.809, + "args": { + "External id": 289484,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366976336.212, "dur": 0.335, + "args": { + "External id": 289485,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366976361.622, "dur": 42.050, + "args": { + "External id": 289486,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333366976489.750, "dur": 490.908, + "args": { + "External id": 289487,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366976562.144, "dur": 411.868, + "args": { + "External id": 289488,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5020, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333366976575.270, "dur": 390.890, + "args": { + "External id": 289489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366977013.064, "dur": 3.583, + "args": { + "External id": 289490,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5022, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070547, "tid": 2070547, + "ts": 5333366977137.798, "dur": 15329.431, + "args": { + "External id": 289491,"Record function id": 0, "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977301.926, "dur": 8.797, + "args": { + "External id": 289492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977316.807, "dur": 3.059, + "args": { + "External id": 289493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977323.526, "dur": 1.131, + "args": { + "External id": 289494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977327.793, "dur": 1.237, + "args": { + "External id": 289495,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977331.930, "dur": 1.322, + "args": { + "External id": 289496,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977337.986, "dur": 1.397, + "args": { + "External id": 289497,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977341.864, "dur": 1.446, + "args": { + "External id": 289498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977345.882, "dur": 3.000, + "args": { + "External id": 289499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977351.265, "dur": 1.032, + "args": { + "External id": 289500,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366977356.802, "dur": 2.544, + "args": { + "External id": 289501,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366977386.174, "dur": 15014.503, + "args": { + "External id": 289502,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366977407.571, "dur": 14979.695, + "args": { + "External id": 289503,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366977434.760, "dur": 18.822, + "args": { + "External id": 289504,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366977459.016, "dur": 14877.098, + "args": { + "External id": 289505,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366977462.492, "dur": 14872.536, + "args": { + "External id": 289506,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366977469.416, "dur": 6.756, + "args": { + "External id": 289507,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366977478.737, "dur": 14850.503, + "args": { + "External id": 289508,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333366992659.020, "dur": 39.212, + "args": { + "External id": 289509,"Sequence number": 1209163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5041 + } + }, + { + "ph": "s", "id": 69, "pid": 2070547, "tid": 2070547, "ts": 5333366992659.020, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333366992680.643, "dur": 12.301, + "args": { + "External id": 289510,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366992685.333, "dur": 7.189, + "args": { + "External id": 289511,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333366992773.301, "dur": 87.248, + "args": { + "External id": 289512,"Record function id": 0, "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333366992861.957, "dur": 1132.257, + "args": { + "External id": 289513,"Record function id": 0, "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333366992902.508, "dur": 1077.004, + "args": { + "External id": 289514,"Sequence number": 1209164, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5046 + } + }, + { + "ph": "s", "id": 68, "pid": 2070547, "tid": 2070547, "ts": 5333366992902.508, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333366992976.628, "dur": 51.445, + "args": { + "External id": 289515,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366993042.105, "dur": 105.261, + "args": { + "External id": 289516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366993157.237, "dur": 57.692, + "args": { + "External id": 289517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366993227.525, "dur": 33.303, + "args": { + "External id": 289518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333366993289.010, "dur": 31.286, + "args": { + "External id": 289519,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333366993338.673, "dur": 16.586, + "args": { + "External id": 289520,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333366993375.988, "dur": 137.519, + "args": { + "External id": 289521,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333366993429.504, "dur": 13.859, + "args": { + "External id": 289522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366993435.879, "dur": 6.547, + "args": { + "External id": 289523,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366993445.925, "dur": 4.957, + "args": { + "External id": 289524,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366993452.174, "dur": 1.285, + "args": { + "External id": 289525,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366993455.796, "dur": 3.910, + "args": { + "External id": 289526,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366993524.416, "dur": 46.096, + "args": { + "External id": 289527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333366993601.099, "dur": 69.017, + "args": { + "External id": 289528,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366993682.861, "dur": 46.223, + "args": { + "External id": 289529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366993737.190, "dur": 34.475, + "args": { + "External id": 289530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333366993797.697, "dur": 25.435, + "args": { + "External id": 289531,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333366993828.681, "dur": 32.938, + "args": { + "External id": 289532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333366993883.807, "dur": 21.341, + "args": { + "External id": 289533,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2070547, "tid": 2070547, + "ts": 5333366994059.807, "dur": 78.037, + "args": { + "External id": 289534,"Record function id": 0, "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333366994240.461, "dur": 49.400, + "args": { + "External id": 289535,"Record function id": 0, "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2070547, "tid": 2070547, + "ts": 5333366994299.913, "dur": 18763.139, + "args": { + "External id": 289536,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070547, "tid": 2070547, + "ts": 5333366994310.723, "dur": 848.837, + "args": { + "External id": 289537,"Record function id": 0, "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366994393.334, "dur": 10.006, + "args": { + "External id": 289538,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366994416.687, "dur": 40.817, + "args": { + "External id": 289539,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994422.039, "dur": 2.316, + "args": { + "External id": 289540,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994431.150, "dur": 0.387, + "args": { + "External id": 289541,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994432.155, "dur": 0.663, + "args": { + "External id": 289542,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994433.522, "dur": 0.660, + "args": { + "External id": 289543,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994439.162, "dur": 0.420, + "args": { + "External id": 289544,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994440.500, "dur": 0.655, + "args": { + "External id": 289545,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994443.414, "dur": 4.530, + "args": { + "External id": 289546,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994448.915, "dur": 0.375, + "args": { + "External id": 289547,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994450.110, "dur": 0.342, + "args": { + "External id": 289548,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366994470.610, "dur": 45.996, + "args": { + "External id": 289549,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333366994548.837, "dur": 162.507, + "args": { + "External id": 289550,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366994560.770, "dur": 4.455, + "args": { + "External id": 289551,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333366994570.182, "dur": 10.213, + "args": { + "External id": 289552,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333366994575.018, "dur": 4.943, + "args": { + "External id": 289553,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994578.049, "dur": 0.750, + "args": { + "External id": 289554,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333366994587.731, "dur": 70.243, + "args": { + "External id": 289555,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994590.181, "dur": 2.548, + "args": { + "External id": 289556,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994593.674, "dur": 0.626, + "args": { + "External id": 289557,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994596.571, "dur": 0.423, + "args": { + "External id": 289558,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994600.143, "dur": 1.213, + "args": { + "External id": 289559,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994602.153, "dur": 0.371, + "args": { + "External id": 289560,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994603.868, "dur": 0.344, + "args": { + "External id": 289561,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994607.682, "dur": 0.311, + "args": { + "External id": 289562,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994609.314, "dur": 0.386, + "args": { + "External id": 289563,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366994611.542, "dur": 2.853, + "args": { + "External id": 289564,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366994673.627, "dur": 29.043, + "args": { + "External id": 289565,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333366994766.323, "dur": 303.703, + "args": { + "External id": 289566,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366994801.072, "dur": 264.636, + "args": { + "External id": 289567,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5099, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333366994813.564, "dur": 246.458, + "args": { + "External id": 289568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333366995092.667, "dur": 2.579, + "args": { + "External id": 289569,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5101, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070547, "tid": 2070547, + "ts": 5333366995201.570, "dur": 17634.842, + "args": { + "External id": 289570,"Record function id": 0, "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995301.989, "dur": 6.850, + "args": { + "External id": 289571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995312.390, "dur": 0.973, + "args": { + "External id": 289572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995316.293, "dur": 1.837, + "args": { + "External id": 289573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995319.923, "dur": 0.989, + "args": { + "External id": 289574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995322.530, "dur": 1.250, + "args": { + "External id": 289575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995325.450, "dur": 1.206, + "args": { + "External id": 289576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995329.721, "dur": 1.484, + "args": { + "External id": 289577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995332.917, "dur": 2.395, + "args": { + "External id": 289578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995336.854, "dur": 0.878, + "args": { + "External id": 289579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333366995339.238, "dur": 0.917, + "args": { + "External id": 289580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366995358.697, "dur": 17403.938, + "args": { + "External id": 289581,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366995374.069, "dur": 17374.602, + "args": { + "External id": 289582,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333366995395.368, "dur": 15.781, + "args": { + "External id": 289583,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333366995415.059, "dur": 17282.366, + "args": { + "External id": 289584,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333366995417.626, "dur": 17278.612, + "args": { + "External id": 289585,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333366995424.378, "dur": 5.280, + "args": { + "External id": 289586,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333366995431.592, "dur": 17257.799, + "args": { + "External id": 289587,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367012999.857, "dur": 35.390, + "args": { + "External id": 289588,"Sequence number": 1209165, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5120 + } + }, + { + "ph": "s", "id": 67, "pid": 2070547, "tid": 2070547, "ts": 5333367012999.857, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367013017.595, "dur": 12.578, + "args": { + "External id": 289589,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367013023.020, "dur": 6.955, + "args": { + "External id": 289590,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367013109.024, "dur": 104.429, + "args": { + "External id": 289591,"Record function id": 0, "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367013216.569, "dur": 1157.513, + "args": { + "External id": 289592,"Record function id": 0, "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367013264.378, "dur": 1095.923, + "args": { + "External id": 289593,"Sequence number": 1209166, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5125 + } + }, + { + "ph": "s", "id": 66, "pid": 2070547, "tid": 2070547, "ts": 5333367013264.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367013338.248, "dur": 48.647, + "args": { + "External id": 289594,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367013401.822, "dur": 108.386, + "args": { + "External id": 289595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367013520.815, "dur": 38.395, + "args": { + "External id": 289596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367013569.622, "dur": 31.417, + "args": { + "External id": 289597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367013666.325, "dur": 33.213, + "args": { + "External id": 289598,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367013722.075, "dur": 17.263, + "args": { + "External id": 289599,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367013761.720, "dur": 138.741, + "args": { + "External id": 289600,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367013816.497, "dur": 12.311, + "args": { + "External id": 289601,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367013822.138, "dur": 5.896, + "args": { + "External id": 289602,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367013831.683, "dur": 6.123, + "args": { + "External id": 289603,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367013839.332, "dur": 1.096, + "args": { + "External id": 289604,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367013843.296, "dur": 4.071, + "args": { + "External id": 289605,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367013912.134, "dur": 53.343, + "args": { + "External id": 289606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367013997.336, "dur": 30.738, + "args": { + "External id": 289607,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367014037.409, "dur": 43.922, + "args": { + "External id": 289608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367014089.518, "dur": 34.517, + "args": { + "External id": 289609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367014148.461, "dur": 45.244, + "args": { + "External id": 289610,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367014202.138, "dur": 39.216, + "args": { + "External id": 289611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367014264.175, "dur": 22.359, + "args": { + "External id": 289612,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2070547, "tid": 2070547, + "ts": 5333367014442.552, "dur": 80.259, + "args": { + "External id": 289613,"Record function id": 0, "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367014600.579, "dur": 98.792, + "args": { + "External id": 289614,"Record function id": 0, "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2070547, "tid": 2070547, + "ts": 5333367014711.125, "dur": 18313.856, + "args": { + "External id": 289615,"Record function id": 0, "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070547, "tid": 2070547, + "ts": 5333367014719.175, "dur": 856.446, + "args": { + "External id": 289616,"Record function id": 0, "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367014799.783, "dur": 10.638, + "args": { + "External id": 289617,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367014824.216, "dur": 46.534, + "args": { + "External id": 289618,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014830.381, "dur": 2.435, + "args": { + "External id": 289619,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014837.903, "dur": 0.490, + "args": { + "External id": 289620,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014840.048, "dur": 0.563, + "args": { + "External id": 289621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014842.348, "dur": 0.545, + "args": { + "External id": 289622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014846.852, "dur": 0.564, + "args": { + "External id": 289623,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014848.670, "dur": 0.598, + "args": { + "External id": 289624,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014850.986, "dur": 4.596, + "args": { + "External id": 289625,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014856.951, "dur": 0.591, + "args": { + "External id": 289626,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014862.473, "dur": 0.607, + "args": { + "External id": 289627,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367014882.476, "dur": 48.683, + "args": { + "External id": 289628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367014966.085, "dur": 127.623, + "args": { + "External id": 289629,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367014977.888, "dur": 3.761, + "args": { + "External id": 289630,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367014986.491, "dur": 12.754, + "args": { + "External id": 289631,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367014991.229, "dur": 7.631, + "args": { + "External id": 289632,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367014994.604, "dur": 2.967, + "args": { + "External id": 289633,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367015009.210, "dur": 35.217, + "args": { + "External id": 289634,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015012.025, "dur": 0.649, + "args": { + "External id": 289635,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015015.234, "dur": 3.171, + "args": { + "External id": 289636,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015019.981, "dur": 0.570, + "args": { + "External id": 289637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015022.153, "dur": 1.541, + "args": { + "External id": 289638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015027.462, "dur": 0.205, + "args": { + "External id": 289639,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015029.135, "dur": 0.333, + "args": { + "External id": 289640,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015030.971, "dur": 0.423, + "args": { + "External id": 289641,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015035.544, "dur": 0.223, + "args": { + "External id": 289642,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015037.376, "dur": 0.408, + "args": { + "External id": 289643,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367015059.023, "dur": 26.691, + "args": { + "External id": 289644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367015146.608, "dur": 336.054, + "args": { + "External id": 289645,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367015206.335, "dur": 271.785, + "args": { + "External id": 289646,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5178, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367015218.480, "dur": 254.660, + "args": { + "External id": 289647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367015505.508, "dur": 2.275, + "args": { + "External id": 289648,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5180, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070547, "tid": 2070547, + "ts": 5333367015596.098, "dur": 17200.262, + "args": { + "External id": 289649,"Record function id": 0, "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015749.714, "dur": 6.849, + "args": { + "External id": 289650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015760.838, "dur": 0.915, + "args": { + "External id": 289651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015763.812, "dur": 2.169, + "args": { + "External id": 289652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015768.167, "dur": 0.836, + "args": { + "External id": 289653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015770.443, "dur": 1.511, + "args": { + "External id": 289654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015773.401, "dur": 1.101, + "args": { + "External id": 289655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015778.613, "dur": 0.967, + "args": { + "External id": 289656,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015781.222, "dur": 2.194, + "args": { + "External id": 289657,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015785.105, "dur": 1.055, + "args": { + "External id": 289658,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367015788.138, "dur": 0.907, + "args": { + "External id": 289659,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367015810.173, "dur": 16910.668, + "args": { + "External id": 289660,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367015826.713, "dur": 16880.693, + "args": { + "External id": 289661,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367015847.638, "dur": 15.144, + "args": { + "External id": 289662,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367015866.427, "dur": 16787.523, + "args": { + "External id": 289663,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367015869.117, "dur": 16783.204, + "args": { + "External id": 289664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367015875.868, "dur": 6.434, + "args": { + "External id": 289665,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367015883.988, "dur": 16732.619, + "args": { + "External id": 289666,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367032959.049, "dur": 36.795, + "args": { + "External id": 289667,"Sequence number": 1209167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5199 + } + }, + { + "ph": "s", "id": 65, "pid": 2070547, "tid": 2070547, "ts": 5333367032959.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367032978.501, "dur": 12.196, + "args": { + "External id": 289668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367032983.935, "dur": 6.532, + "args": { + "External id": 289669,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367033069.679, "dur": 84.477, + "args": { + "External id": 289670,"Record function id": 0, "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367033155.994, "dur": 1172.485, + "args": { + "External id": 289671,"Record function id": 0, "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367033221.099, "dur": 1092.795, + "args": { + "External id": 289672,"Sequence number": 1209168, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5204 + } + }, + { + "ph": "s", "id": 64, "pid": 2070547, "tid": 2070547, "ts": 5333367033221.099, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367033293.786, "dur": 51.384, + "args": { + "External id": 289673,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367033359.478, "dur": 111.246, + "args": { + "External id": 289674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367033480.408, "dur": 40.064, + "args": { + "External id": 289675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367033529.249, "dur": 31.115, + "args": { + "External id": 289676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367033587.208, "dur": 27.955, + "args": { + "External id": 289677,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367033675.342, "dur": 19.911, + "args": { + "External id": 289678,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367033717.583, "dur": 137.707, + "args": { + "External id": 289679,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367033771.176, "dur": 13.394, + "args": { + "External id": 289680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367033776.730, "dur": 7.003, + "args": { + "External id": 289681,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367033787.435, "dur": 5.093, + "args": { + "External id": 289682,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367033793.968, "dur": 1.151, + "args": { + "External id": 289683,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367033797.494, "dur": 5.784, + "args": { + "External id": 289684,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367033866.445, "dur": 54.653, + "args": { + "External id": 289685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367033949.961, "dur": 29.321, + "args": { + "External id": 289686,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367033990.075, "dur": 41.137, + "args": { + "External id": 289687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367034039.782, "dur": 36.023, + "args": { + "External id": 289688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367034098.341, "dur": 25.526, + "args": { + "External id": 289689,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367034129.534, "dur": 33.203, + "args": { + "External id": 289690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367034209.612, "dur": 25.075, + "args": { + "External id": 289691,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2070547, "tid": 2070547, + "ts": 5333367034394.130, "dur": 76.122, + "args": { + "External id": 289692,"Record function id": 0, "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367034547.469, "dur": 47.646, + "args": { + "External id": 289693,"Record function id": 0, "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2070547, "tid": 2070547, + "ts": 5333367034604.628, "dur": 18469.961, + "args": { + "External id": 289694,"Record function id": 0, "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070547, "tid": 2070547, + "ts": 5333367034611.685, "dur": 926.171, + "args": { + "External id": 289695,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367034744.022, "dur": 9.662, + "args": { + "External id": 289696,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367034767.567, "dur": 43.185, + "args": { + "External id": 289697,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034773.516, "dur": 2.420, + "args": { + "External id": 289698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034781.129, "dur": 0.550, + "args": { + "External id": 289699,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034783.459, "dur": 0.423, + "args": { + "External id": 289700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034785.887, "dur": 0.625, + "args": { + "External id": 289701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034789.873, "dur": 0.589, + "args": { + "External id": 289702,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034791.958, "dur": 0.577, + "args": { + "External id": 289703,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034794.335, "dur": 4.439, + "args": { + "External id": 289704,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034800.173, "dur": 0.342, + "args": { + "External id": 289705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034802.073, "dur": 0.554, + "args": { + "External id": 289706,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367034823.055, "dur": 46.215, + "args": { + "External id": 289707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367034903.712, "dur": 125.247, + "args": { + "External id": 289708,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367034915.501, "dur": 4.308, + "args": { + "External id": 289709,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367034924.940, "dur": 11.419, + "args": { + "External id": 289710,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367034929.820, "dur": 6.131, + "args": { + "External id": 289711,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034933.963, "dur": 0.689, + "args": { + "External id": 289712,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367034942.920, "dur": 41.290, + "args": { + "External id": 289713,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034945.659, "dur": 2.873, + "args": { + "External id": 289714,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034951.105, "dur": 0.653, + "args": { + "External id": 289715,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034953.400, "dur": 0.328, + "args": { + "External id": 289716,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034957.543, "dur": 1.757, + "args": { + "External id": 289717,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034961.500, "dur": 0.535, + "args": { + "External id": 289718,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034969.346, "dur": 0.462, + "args": { + "External id": 289719,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034973.046, "dur": 0.511, + "args": { + "External id": 289720,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034975.100, "dur": 0.576, + "args": { + "External id": 289721,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367034977.196, "dur": 2.547, + "args": { + "External id": 289722,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367034995.724, "dur": 24.763, + "args": { + "External id": 289723,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367035080.779, "dur": 358.453, + "args": { + "External id": 289724,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367035115.417, "dur": 318.366, + "args": { + "External id": 289725,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5257, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367035127.204, "dur": 298.956, + "args": { + "External id": 289726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367035464.554, "dur": 2.853, + "args": { + "External id": 289727,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5259, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070547, "tid": 2070547, + "ts": 5333367035558.693, "dur": 17288.609, + "args": { + "External id": 289728,"Record function id": 0, "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035708.201, "dur": 6.871, + "args": { + "External id": 289729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035719.856, "dur": 1.232, + "args": { + "External id": 289730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035723.363, "dur": 3.207, + "args": { + "External id": 289731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035728.774, "dur": 0.845, + "args": { + "External id": 289732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035731.173, "dur": 0.904, + "args": { + "External id": 289733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035733.456, "dur": 0.728, + "args": { + "External id": 289734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035738.354, "dur": 1.151, + "args": { + "External id": 289735,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035741.641, "dur": 2.756, + "args": { + "External id": 289736,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035745.892, "dur": 0.825, + "args": { + "External id": 289737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367035748.403, "dur": 0.836, + "args": { + "External id": 289738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367035770.597, "dur": 17002.721, + "args": { + "External id": 289739,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367035786.518, "dur": 16973.066, + "args": { + "External id": 289740,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367035807.597, "dur": 15.988, + "args": { + "External id": 289741,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367035827.285, "dur": 16881.207, + "args": { + "External id": 289742,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367035829.912, "dur": 16877.240, + "args": { + "External id": 289743,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367035836.639, "dur": 6.528, + "args": { + "External id": 289744,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367035844.932, "dur": 16855.407, + "args": { + "External id": 289745,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367053010.479, "dur": 36.429, + "args": { + "External id": 289746,"Sequence number": 1209169, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5278 + } + }, + { + "ph": "s", "id": 63, "pid": 2070547, "tid": 2070547, "ts": 5333367053010.479, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367053029.192, "dur": 12.583, + "args": { + "External id": 289747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367053034.334, "dur": 7.188, + "args": { + "External id": 289748,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367053117.599, "dur": 100.564, + "args": { + "External id": 289749,"Record function id": 0, "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367053221.445, "dur": 1154.098, + "args": { + "External id": 289750,"Record function id": 0, "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367053267.044, "dur": 1094.468, + "args": { + "External id": 289751,"Sequence number": 1209170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5283 + } + }, + { + "ph": "s", "id": 62, "pid": 2070547, "tid": 2070547, "ts": 5333367053267.044, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367053338.805, "dur": 49.313, + "args": { + "External id": 289752,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367053402.277, "dur": 107.395, + "args": { + "External id": 289753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367053520.991, "dur": 47.813, + "args": { + "External id": 289754,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367053578.149, "dur": 30.762, + "args": { + "External id": 289755,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367053678.763, "dur": 32.409, + "args": { + "External id": 289756,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367053731.487, "dur": 16.102, + "args": { + "External id": 289757,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367053769.470, "dur": 139.109, + "args": { + "External id": 289758,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367053823.862, "dur": 12.705, + "args": { + "External id": 289759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367053829.417, "dur": 6.321, + "args": { + "External id": 289760,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367053839.385, "dur": 5.723, + "args": { + "External id": 289761,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367053846.322, "dur": 1.585, + "args": { + "External id": 289762,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367053850.726, "dur": 5.183, + "args": { + "External id": 289763,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367053919.816, "dur": 53.492, + "args": { + "External id": 289764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367054004.393, "dur": 27.827, + "args": { + "External id": 289765,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367054041.176, "dur": 41.897, + "args": { + "External id": 289766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367054092.693, "dur": 34.890, + "args": { + "External id": 289767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367054148.809, "dur": 43.802, + "args": { + "External id": 289768,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367054200.522, "dur": 38.155, + "args": { + "External id": 289769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367054263.510, "dur": 22.025, + "args": { + "External id": 289770,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2070547, "tid": 2070547, + "ts": 5333367054443.574, "dur": 75.424, + "args": { + "External id": 289771,"Record function id": 0, "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367054598.995, "dur": 94.407, + "args": { + "External id": 289772,"Record function id": 0, "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2070547, "tid": 2070547, + "ts": 5333367054704.729, "dur": 18168.254, + "args": { + "External id": 289773,"Record function id": 0, "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070547, "tid": 2070547, + "ts": 5333367054712.872, "dur": 976.392, + "args": { + "External id": 289774,"Record function id": 0, "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367054796.686, "dur": 9.893, + "args": { + "External id": 289775,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367054819.815, "dur": 43.317, + "args": { + "External id": 289776,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054825.339, "dur": 2.166, + "args": { + "External id": 289777,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054832.852, "dur": 0.511, + "args": { + "External id": 289778,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054835.241, "dur": 0.588, + "args": { + "External id": 289779,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054837.872, "dur": 0.239, + "args": { + "External id": 289780,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054842.281, "dur": 0.670, + "args": { + "External id": 289781,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054844.561, "dur": 0.644, + "args": { + "External id": 289782,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054847.236, "dur": 3.993, + "args": { + "External id": 289783,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054852.899, "dur": 0.661, + "args": { + "External id": 289784,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367054855.060, "dur": 0.551, + "args": { + "External id": 289785,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367054878.191, "dur": 56.105, + "args": { + "External id": 289786,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367054971.431, "dur": 151.054, + "args": { + "External id": 289787,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367054983.724, "dur": 6.530, + "args": { + "External id": 289788,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367054995.386, "dur": 11.052, + "args": { + "External id": 289789,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367055000.099, "dur": 5.946, + "args": { + "External id": 289790,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055003.972, "dur": 0.657, + "args": { + "External id": 289791,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367055013.302, "dur": 55.530, + "args": { + "External id": 289792,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055042.597, "dur": 0.469, + "args": { + "External id": 289793,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055046.740, "dur": 0.315, + "args": { + "External id": 289794,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055048.112, "dur": 0.243, + "args": { + "External id": 289795,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055049.426, "dur": 3.412, + "args": { + "External id": 289796,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055054.003, "dur": 0.416, + "args": { + "External id": 289797,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055055.388, "dur": 0.143, + "args": { + "External id": 289798,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055059.631, "dur": 0.165, + "args": { + "External id": 289799,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055060.955, "dur": 0.140, + "args": { + "External id": 289800,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055062.448, "dur": 0.153, + "args": { + "External id": 289801,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367055088.181, "dur": 26.099, + "args": { + "External id": 289802,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367055195.478, "dur": 357.550, + "args": { + "External id": 289803,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367055230.576, "dur": 317.501, + "args": { + "External id": 289804,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5336, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367055246.393, "dur": 296.237, + "args": { + "External id": 289805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367055574.643, "dur": 2.478, + "args": { + "External id": 289806,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5338, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070547, "tid": 2070547, + "ts": 5333367055713.584, "dur": 16956.980, + "args": { + "External id": 289807,"Record function id": 0, "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055812.322, "dur": 6.799, + "args": { + "External id": 289808,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055823.207, "dur": 1.165, + "args": { + "External id": 289809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055826.488, "dur": 3.149, + "args": { + "External id": 289810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055831.703, "dur": 1.151, + "args": { + "External id": 289811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055834.513, "dur": 1.031, + "args": { + "External id": 289812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055836.944, "dur": 0.951, + "args": { + "External id": 289813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055841.800, "dur": 1.145, + "args": { + "External id": 289814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055844.413, "dur": 2.377, + "args": { + "External id": 289815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055848.524, "dur": 0.990, + "args": { + "External id": 289816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367055851.100, "dur": 1.061, + "args": { + "External id": 289817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367055872.615, "dur": 16721.766, + "args": { + "External id": 289818,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367055888.668, "dur": 16697.805, + "args": { + "External id": 289819,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367055909.258, "dur": 16.462, + "args": { + "External id": 289820,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367055929.964, "dur": 16621.283, + "args": { + "External id": 289821,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367055932.655, "dur": 16617.921, + "args": { + "External id": 289822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367055939.379, "dur": 6.414, + "args": { + "External id": 289823,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367055947.618, "dur": 16599.606, + "args": { + "External id": 289824,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367072815.339, "dur": 31.636, + "args": { + "External id": 289825,"Sequence number": 1209171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5357 + } + }, + { + "ph": "s", "id": 61, "pid": 2070547, "tid": 2070547, "ts": 5333367072815.339, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367072832.525, "dur": 9.509, + "args": { + "External id": 289826,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367072836.291, "dur": 5.504, + "args": { + "External id": 289827,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367072915.223, "dur": 87.691, + "args": { + "External id": 289828,"Record function id": 0, "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367073004.512, "dur": 1105.895, + "args": { + "External id": 289829,"Record function id": 0, "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367073047.373, "dur": 1049.095, + "args": { + "External id": 289830,"Sequence number": 1209172, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5362 + } + }, + { + "ph": "s", "id": 60, "pid": 2070547, "tid": 2070547, "ts": 5333367073047.373, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367073115.511, "dur": 46.886, + "args": { + "External id": 289831,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367073193.661, "dur": 105.402, + "args": { + "External id": 289832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367073311.214, "dur": 37.531, + "args": { + "External id": 289833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367073357.961, "dur": 30.894, + "args": { + "External id": 289834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367073417.957, "dur": 28.561, + "args": { + "External id": 289835,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367073464.052, "dur": 15.420, + "args": { + "External id": 289836,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367073499.411, "dur": 171.368, + "args": { + "External id": 289837,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367073551.203, "dur": 10.884, + "args": { + "External id": 289838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367073556.221, "dur": 5.080, + "args": { + "External id": 289839,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367073564.666, "dur": 5.426, + "args": { + "External id": 289840,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367073571.515, "dur": 1.189, + "args": { + "External id": 289841,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367073575.362, "dur": 5.145, + "args": { + "External id": 289842,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367073684.089, "dur": 55.438, + "args": { + "External id": 289843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367073771.543, "dur": 29.505, + "args": { + "External id": 289844,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367073811.150, "dur": 41.848, + "args": { + "External id": 289845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367073861.154, "dur": 34.806, + "args": { + "External id": 289846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367073917.900, "dur": 28.537, + "args": { + "External id": 289847,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367073952.012, "dur": 33.420, + "args": { + "External id": 289848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367074005.321, "dur": 19.219, + "args": { + "External id": 289849,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2070547, "tid": 2070547, + "ts": 5333367074194.265, "dur": 73.849, + "args": { + "External id": 289850,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367074346.105, "dur": 46.166, + "args": { + "External id": 289851,"Record function id": 0, "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2070547, "tid": 2070547, + "ts": 5333367074402.228, "dur": 18257.680, + "args": { + "External id": 289852,"Record function id": 0, "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070547, "tid": 2070547, + "ts": 5333367074411.479, "dur": 870.638, + "args": { + "External id": 289853,"Record function id": 0, "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367074490.393, "dur": 9.142, + "args": { + "External id": 289854,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367074512.864, "dur": 39.766, + "args": { + "External id": 289855,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074518.244, "dur": 2.374, + "args": { + "External id": 289856,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074525.016, "dur": 0.447, + "args": { + "External id": 289857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074527.015, "dur": 0.588, + "args": { + "External id": 289858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074528.985, "dur": 0.849, + "args": { + "External id": 289859,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074533.073, "dur": 0.692, + "args": { + "External id": 289860,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074534.934, "dur": 0.652, + "args": { + "External id": 289861,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074536.713, "dur": 4.207, + "args": { + "External id": 289862,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074542.412, "dur": 0.183, + "args": { + "External id": 289863,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074543.983, "dur": 0.641, + "args": { + "External id": 289864,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367074564.956, "dur": 44.160, + "args": { + "External id": 289865,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367074682.971, "dur": 125.517, + "args": { + "External id": 289866,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367074695.737, "dur": 6.166, + "args": { + "External id": 289867,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367074707.100, "dur": 11.734, + "args": { + "External id": 289868,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367074712.134, "dur": 6.282, + "args": { + "External id": 289869,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074715.852, "dur": 0.949, + "args": { + "External id": 289870,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367074726.034, "dur": 31.471, + "args": { + "External id": 289871,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074727.983, "dur": 2.977, + "args": { + "External id": 289872,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074732.463, "dur": 0.347, + "args": { + "External id": 289873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074734.469, "dur": 0.594, + "args": { + "External id": 289874,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074738.516, "dur": 1.252, + "args": { + "External id": 289875,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074741.019, "dur": 0.411, + "args": { + "External id": 289876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074742.541, "dur": 0.557, + "args": { + "External id": 289877,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074746.720, "dur": 0.187, + "args": { + "External id": 289878,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074747.894, "dur": 0.178, + "args": { + "External id": 289879,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367074749.337, "dur": 2.571, + "args": { + "External id": 289880,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367074770.277, "dur": 29.127, + "args": { + "External id": 289881,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367074862.901, "dur": 320.389, + "args": { + "External id": 289882,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367074897.869, "dur": 264.146, + "args": { + "External id": 289883,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5415, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367074909.114, "dur": 247.456, + "args": { + "External id": 289884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367075207.859, "dur": 3.425, + "args": { + "External id": 289885,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5417, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070547, "tid": 2070547, + "ts": 5333367075303.015, "dur": 17108.101, + "args": { + "External id": 289886,"Record function id": 0, "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075400.458, "dur": 6.085, + "args": { + "External id": 289887,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075410.429, "dur": 0.958, + "args": { + "External id": 289888,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075413.066, "dur": 1.687, + "args": { + "External id": 289889,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075416.538, "dur": 1.186, + "args": { + "External id": 289890,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075418.897, "dur": 1.343, + "args": { + "External id": 289891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075421.742, "dur": 0.890, + "args": { + "External id": 289892,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075426.258, "dur": 0.955, + "args": { + "External id": 289893,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075428.629, "dur": 2.359, + "args": { + "External id": 289894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075432.242, "dur": 0.809, + "args": { + "External id": 289895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367075434.708, "dur": 0.602, + "args": { + "External id": 289896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367075454.632, "dur": 16895.438, + "args": { + "External id": 289897,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367075469.708, "dur": 16870.524, + "args": { + "External id": 289898,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367075490.360, "dur": 16.568, + "args": { + "External id": 289899,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367075511.360, "dur": 16787.706, + "args": { + "External id": 289900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367075513.760, "dur": 16784.394, + "args": { + "External id": 289901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367075519.632, "dur": 5.867, + "args": { + "External id": 289902,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367075527.408, "dur": 16765.956, + "args": { + "External id": 289903,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367092566.181, "dur": 33.114, + "args": { + "External id": 289904,"Sequence number": 1209173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5436 + } + }, + { + "ph": "s", "id": 59, "pid": 2070547, "tid": 2070547, "ts": 5333367092566.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367092583.632, "dur": 10.655, + "args": { + "External id": 289905,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367092588.135, "dur": 5.946, + "args": { + "External id": 289906,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367092705.740, "dur": 85.600, + "args": { + "External id": 289907,"Record function id": 0, "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367092793.447, "dur": 1131.170, + "args": { + "External id": 289908,"Record function id": 0, "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367092838.643, "dur": 1071.762, + "args": { + "External id": 289909,"Sequence number": 1209174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5441 + } + }, + { + "ph": "s", "id": 58, "pid": 2070547, "tid": 2070547, "ts": 5333367092838.643, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367092909.637, "dur": 47.008, + "args": { + "External id": 289910,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367092969.997, "dur": 103.658, + "args": { + "External id": 289911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367093084.337, "dur": 37.554, + "args": { + "External id": 289912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367093131.390, "dur": 31.138, + "args": { + "External id": 289913,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367093208.185, "dur": 31.180, + "args": { + "External id": 289914,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367093259.102, "dur": 17.023, + "args": { + "External id": 289915,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367093296.537, "dur": 135.992, + "args": { + "External id": 289916,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367093350.428, "dur": 12.039, + "args": { + "External id": 289917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367093355.888, "dur": 5.742, + "args": { + "External id": 289918,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367093365.144, "dur": 5.326, + "args": { + "External id": 289919,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367093372.197, "dur": 1.050, + "args": { + "External id": 289920,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367093375.420, "dur": 3.765, + "args": { + "External id": 289921,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367093443.685, "dur": 50.033, + "args": { + "External id": 289922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367093524.497, "dur": 29.207, + "args": { + "External id": 289923,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367093562.371, "dur": 42.484, + "args": { + "External id": 289924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367093656.909, "dur": 41.111, + "args": { + "External id": 289925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367093730.201, "dur": 29.302, + "args": { + "External id": 289926,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367093768.456, "dur": 34.485, + "args": { + "External id": 289927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367093820.039, "dur": 22.139, + "args": { + "External id": 289928,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2070547, "tid": 2070547, + "ts": 5333367093991.369, "dur": 76.534, + "args": { + "External id": 289929,"Record function id": 0, "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367094144.762, "dur": 67.103, + "args": { + "External id": 289930,"Record function id": 0, "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2070547, "tid": 2070547, + "ts": 5333367094221.921, "dur": 18073.694, + "args": { + "External id": 289931,"Record function id": 0, "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070547, "tid": 2070547, + "ts": 5333367094230.558, "dur": 847.068, + "args": { + "External id": 289932,"Record function id": 0, "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367094312.749, "dur": 10.117, + "args": { + "External id": 289933,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367094335.910, "dur": 37.834, + "args": { + "External id": 289934,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094341.133, "dur": 2.247, + "args": { + "External id": 289935,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094347.927, "dur": 0.235, + "args": { + "External id": 289936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094349.883, "dur": 0.626, + "args": { + "External id": 289937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094351.873, "dur": 0.601, + "args": { + "External id": 289938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094355.781, "dur": 0.209, + "args": { + "External id": 289939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094357.347, "dur": 0.597, + "args": { + "External id": 289940,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094359.164, "dur": 3.793, + "args": { + "External id": 289941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094364.063, "dur": 0.478, + "args": { + "External id": 289942,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094366.001, "dur": 0.191, + "args": { + "External id": 289943,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367094385.407, "dur": 46.478, + "args": { + "External id": 289944,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367094465.159, "dur": 115.498, + "args": { + "External id": 289945,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367094477.397, "dur": 3.943, + "args": { + "External id": 289946,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367094486.079, "dur": 12.877, + "args": { + "External id": 289947,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367094490.910, "dur": 7.645, + "args": { + "External id": 289948,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094496.302, "dur": 0.797, + "args": { + "External id": 289949,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367094505.927, "dur": 30.322, + "args": { + "External id": 289950,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094508.213, "dur": 3.099, + "args": { + "External id": 289951,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094512.768, "dur": 0.418, + "args": { + "External id": 289952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094514.087, "dur": 0.189, + "args": { + "External id": 289953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094517.522, "dur": 1.305, + "args": { + "External id": 289954,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094520.063, "dur": 0.740, + "args": { + "External id": 289955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094522.193, "dur": 0.396, + "args": { + "External id": 289956,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094525.480, "dur": 0.737, + "args": { + "External id": 289957,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094527.464, "dur": 0.358, + "args": { + "External id": 289958,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367094528.820, "dur": 2.684, + "args": { + "External id": 289959,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367094548.910, "dur": 24.240, + "args": { + "External id": 289960,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367094675.310, "dur": 310.050, + "args": { + "External id": 289961,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367094712.407, "dur": 268.277, + "args": { + "External id": 289962,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5494, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367094724.004, "dur": 251.051, + "args": { + "External id": 289963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367095006.861, "dur": 2.561, + "args": { + "External id": 289964,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5496, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070547, "tid": 2070547, + "ts": 5333367095097.604, "dur": 16988.964, + "args": { + "External id": 289965,"Record function id": 0, "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095211.894, "dur": 6.744, + "args": { + "External id": 289966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095223.052, "dur": 0.946, + "args": { + "External id": 289967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095226.271, "dur": 2.273, + "args": { + "External id": 289968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095230.585, "dur": 0.705, + "args": { + "External id": 289969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095232.847, "dur": 1.189, + "args": { + "External id": 289970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095235.301, "dur": 0.928, + "args": { + "External id": 289971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095240.254, "dur": 1.228, + "args": { + "External id": 289972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095242.912, "dur": 2.669, + "args": { + "External id": 289973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095247.237, "dur": 0.837, + "args": { + "External id": 289974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367095249.436, "dur": 0.647, + "args": { + "External id": 289975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367095270.409, "dur": 16769.909, + "args": { + "External id": 289976,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367095285.854, "dur": 16746.864, + "args": { + "External id": 289977,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367095305.095, "dur": 15.663, + "args": { + "External id": 289978,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367095324.424, "dur": 16673.080, + "args": { + "External id": 289979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367095326.878, "dur": 16670.063, + "args": { + "External id": 289980,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367095333.462, "dur": 6.928, + "args": { + "External id": 289981,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367095342.162, "dur": 16651.739, + "args": { + "External id": 289982,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367112236.071, "dur": 33.639, + "args": { + "External id": 289983,"Sequence number": 1209175, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5515 + } + }, + { + "ph": "s", "id": 57, "pid": 2070547, "tid": 2070547, "ts": 5333367112236.071, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367112255.188, "dur": 9.877, + "args": { + "External id": 289984,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367112258.873, "dur": 5.738, + "args": { + "External id": 289985,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367112336.696, "dur": 86.733, + "args": { + "External id": 289986,"Record function id": 0, "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367112424.942, "dur": 1102.215, + "args": { + "External id": 289987,"Record function id": 0, "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367112469.345, "dur": 1042.797, + "args": { + "External id": 289988,"Sequence number": 1209176, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5520 + } + }, + { + "ph": "s", "id": 56, "pid": 2070547, "tid": 2070547, "ts": 5333367112469.345, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367112539.892, "dur": 47.413, + "args": { + "External id": 289989,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367112600.267, "dur": 131.457, + "args": { + "External id": 289990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367112745.457, "dur": 40.431, + "args": { + "External id": 289991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367112794.407, "dur": 30.673, + "args": { + "External id": 289992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367112854.463, "dur": 30.603, + "args": { + "External id": 289993,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367112902.281, "dur": 15.944, + "args": { + "External id": 289994,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367112937.691, "dur": 135.876, + "args": { + "External id": 289995,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367112992.309, "dur": 11.020, + "args": { + "External id": 289996,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367112997.353, "dur": 5.232, + "args": { + "External id": 289997,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367113006.209, "dur": 5.991, + "args": { + "External id": 289998,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367113013.708, "dur": 1.192, + "args": { + "External id": 289999,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367113017.245, "dur": 5.303, + "args": { + "External id": 290000,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367113084.170, "dur": 48.230, + "args": { + "External id": 290001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367113162.243, "dur": 45.913, + "args": { + "External id": 290002,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367113220.018, "dur": 45.033, + "args": { + "External id": 290003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367113274.250, "dur": 34.402, + "args": { + "External id": 290004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367113332.851, "dur": 27.413, + "args": { + "External id": 290005,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367113366.050, "dur": 33.227, + "args": { + "External id": 290006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367113419.523, "dur": 19.872, + "args": { + "External id": 290007,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2070547, "tid": 2070547, + "ts": 5333367113592.978, "dur": 114.166, + "args": { + "External id": 290008,"Record function id": 0, "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367113786.604, "dur": 49.510, + "args": { + "External id": 290009,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2070547, "tid": 2070547, + "ts": 5333367113845.070, "dur": 18228.250, + "args": { + "External id": 290010,"Record function id": 0, "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070547, "tid": 2070547, + "ts": 5333367113852.050, "dur": 871.464, + "args": { + "External id": 290011,"Record function id": 0, "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367113933.447, "dur": 9.578, + "args": { + "External id": 290012,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367113955.716, "dur": 36.133, + "args": { + "External id": 290013,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113961.235, "dur": 2.544, + "args": { + "External id": 290014,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113968.076, "dur": 0.257, + "args": { + "External id": 290015,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113969.442, "dur": 0.595, + "args": { + "External id": 290016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113971.102, "dur": 0.176, + "args": { + "External id": 290017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113974.642, "dur": 0.460, + "args": { + "External id": 290018,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113975.904, "dur": 1.002, + "args": { + "External id": 290019,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113977.836, "dur": 4.024, + "args": { + "External id": 290020,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113982.800, "dur": 0.374, + "args": { + "External id": 290021,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367113983.969, "dur": 0.435, + "args": { + "External id": 290022,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367114002.877, "dur": 43.528, + "args": { + "External id": 290023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367114079.677, "dur": 139.847, + "args": { + "External id": 290024,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367114091.841, "dur": 3.454, + "args": { + "External id": 290025,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367114100.604, "dur": 10.604, + "args": { + "External id": 290026,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367114104.999, "dur": 5.825, + "args": { + "External id": 290027,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114108.980, "dur": 0.520, + "args": { + "External id": 290028,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367114117.779, "dur": 35.357, + "args": { + "External id": 290029,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114120.530, "dur": 2.962, + "args": { + "External id": 290030,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114125.316, "dur": 0.431, + "args": { + "External id": 290031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114127.079, "dur": 0.299, + "args": { + "External id": 290032,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114130.796, "dur": 1.887, + "args": { + "External id": 290033,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114134.321, "dur": 0.182, + "args": { + "External id": 290034,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114136.319, "dur": 0.155, + "args": { + "External id": 290035,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114140.234, "dur": 0.503, + "args": { + "External id": 290036,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114142.022, "dur": 0.523, + "args": { + "External id": 290037,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114144.187, "dur": 2.806, + "args": { + "External id": 290038,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367114164.365, "dur": 44.968, + "args": { + "External id": 290039,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367114274.629, "dur": 315.423, + "args": { + "External id": 290040,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367114310.613, "dur": 274.300, + "args": { + "External id": 290041,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5573, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367114321.342, "dur": 258.202, + "args": { + "External id": 290042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367114612.723, "dur": 2.380, + "args": { + "External id": 290043,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5575, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070547, "tid": 2070547, + "ts": 5333367114744.822, "dur": 17131.028, + "args": { + "External id": 290044,"Record function id": 0, "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114840.817, "dur": 6.560, + "args": { + "External id": 290045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114850.453, "dur": 1.545, + "args": { + "External id": 290046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114853.844, "dur": 2.338, + "args": { + "External id": 290047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114857.945, "dur": 1.247, + "args": { + "External id": 290048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114860.352, "dur": 1.078, + "args": { + "External id": 290049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114862.789, "dur": 0.971, + "args": { + "External id": 290050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114867.736, "dur": 0.996, + "args": { + "External id": 290051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114870.492, "dur": 3.796, + "args": { + "External id": 290052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114875.881, "dur": 1.084, + "args": { + "External id": 290053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367114878.878, "dur": 0.856, + "args": { + "External id": 290054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367114899.858, "dur": 16930.364, + "args": { + "External id": 290055,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367114915.715, "dur": 16906.087, + "args": { + "External id": 290056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367114935.748, "dur": 14.444, + "args": { + "External id": 290057,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367114954.546, "dur": 16831.426, + "args": { + "External id": 290058,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367114957.341, "dur": 16827.877, + "args": { + "External id": 290059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367114963.473, "dur": 5.189, + "args": { + "External id": 290060,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367114970.403, "dur": 16811.765, + "args": { + "External id": 290061,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367132013.195, "dur": 35.190, + "args": { + "External id": 290062,"Sequence number": 1209177, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5594 + } + }, + { + "ph": "s", "id": 55, "pid": 2070547, "tid": 2070547, "ts": 5333367132013.195, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367132034.704, "dur": 9.134, + "args": { + "External id": 290063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367132038.668, "dur": 4.936, + "args": { + "External id": 290064,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367132112.969, "dur": 99.349, + "args": { + "External id": 290065,"Record function id": 0, "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367132215.173, "dur": 1110.571, + "args": { + "External id": 290066,"Record function id": 0, "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367132257.954, "dur": 1053.889, + "args": { + "External id": 290067,"Sequence number": 1209178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5599 + } + }, + { + "ph": "s", "id": 54, "pid": 2070547, "tid": 2070547, "ts": 5333367132257.954, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367132325.358, "dur": 46.784, + "args": { + "External id": 290068,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367132384.272, "dur": 105.909, + "args": { + "External id": 290069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367132500.994, "dur": 37.993, + "args": { + "External id": 290070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367132547.087, "dur": 30.626, + "args": { + "External id": 290071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367132602.818, "dur": 67.491, + "args": { + "External id": 290072,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367132695.308, "dur": 18.346, + "args": { + "External id": 290073,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367132734.736, "dur": 139.975, + "args": { + "External id": 290074,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367132792.066, "dur": 11.951, + "args": { + "External id": 290075,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367132797.210, "dur": 6.044, + "args": { + "External id": 290076,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367132806.394, "dur": 6.403, + "args": { + "External id": 290077,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367132814.104, "dur": 1.257, + "args": { + "External id": 290078,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367132817.636, "dur": 4.510, + "args": { + "External id": 290079,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367132885.533, "dur": 52.203, + "args": { + "External id": 290080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367132968.717, "dur": 30.763, + "args": { + "External id": 290081,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367133008.168, "dur": 41.227, + "args": { + "External id": 290082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367133059.077, "dur": 33.952, + "args": { + "External id": 290083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367133112.619, "dur": 27.491, + "args": { + "External id": 290084,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367133145.351, "dur": 50.936, + "args": { + "External id": 290085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367133222.595, "dur": 19.698, + "args": { + "External id": 290086,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2070547, "tid": 2070547, + "ts": 5333367133389.980, "dur": 72.589, + "args": { + "External id": 290087,"Record function id": 0, "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367133536.202, "dur": 46.817, + "args": { + "External id": 290088,"Record function id": 0, "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2070547, "tid": 2070547, + "ts": 5333367133591.916, "dur": 18379.100, + "args": { + "External id": 290089,"Record function id": 0, "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070547, "tid": 2070547, + "ts": 5333367133599.714, "dur": 879.494, + "args": { + "External id": 290090,"Record function id": 0, "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367133722.332, "dur": 10.287, + "args": { + "External id": 290091,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367133747.146, "dur": 43.764, + "args": { + "External id": 290092,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133753.256, "dur": 2.582, + "args": { + "External id": 290093,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133760.789, "dur": 0.570, + "args": { + "External id": 290094,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133762.886, "dur": 0.264, + "args": { + "External id": 290095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133765.096, "dur": 0.547, + "args": { + "External id": 290096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133769.089, "dur": 0.361, + "args": { + "External id": 290097,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133770.809, "dur": 0.561, + "args": { + "External id": 290098,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133773.065, "dur": 4.349, + "args": { + "External id": 290099,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133779.050, "dur": 0.529, + "args": { + "External id": 290100,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133781.364, "dur": 0.610, + "args": { + "External id": 290101,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367133802.833, "dur": 44.603, + "args": { + "External id": 290102,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367133881.774, "dur": 123.706, + "args": { + "External id": 290103,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367133893.174, "dur": 3.389, + "args": { + "External id": 290104,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367133901.785, "dur": 10.781, + "args": { + "External id": 290105,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367133906.515, "dur": 5.643, + "args": { + "External id": 290106,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133910.252, "dur": 0.680, + "args": { + "External id": 290107,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367133919.431, "dur": 37.415, + "args": { + "External id": 290108,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133921.752, "dur": 2.626, + "args": { + "External id": 290109,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133925.936, "dur": 0.417, + "args": { + "External id": 290110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133927.955, "dur": 0.434, + "args": { + "External id": 290111,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133932.284, "dur": 1.912, + "args": { + "External id": 290112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133938.925, "dur": 0.358, + "args": { + "External id": 290113,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133941.224, "dur": 0.406, + "args": { + "External id": 290114,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133945.054, "dur": 0.207, + "args": { + "External id": 290115,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133946.504, "dur": 0.338, + "args": { + "External id": 290116,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367133948.395, "dur": 2.808, + "args": { + "External id": 290117,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367133970.123, "dur": 26.338, + "args": { + "External id": 290118,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367134059.010, "dur": 323.836, + "args": { + "External id": 290119,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367134094.694, "dur": 282.920, + "args": { + "External id": 290120,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5652, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367134105.292, "dur": 266.645, + "args": { + "External id": 290121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367134406.900, "dur": 2.358, + "args": { + "External id": 290122,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5654, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070547, "tid": 2070547, + "ts": 5333367134500.122, "dur": 17247.158, + "args": { + "External id": 290123,"Record function id": 0, "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134596.041, "dur": 6.402, + "args": { + "External id": 290124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134605.614, "dur": 1.351, + "args": { + "External id": 290125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134608.566, "dur": 2.970, + "args": { + "External id": 290126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134613.149, "dur": 1.083, + "args": { + "External id": 290127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134615.719, "dur": 1.042, + "args": { + "External id": 290128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134653.594, "dur": 2.004, + "args": { + "External id": 290129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134662.171, "dur": 1.478, + "args": { + "External id": 290130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134665.379, "dur": 2.972, + "args": { + "External id": 290131,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134670.211, "dur": 0.923, + "args": { + "External id": 290132,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367134672.682, "dur": 0.790, + "args": { + "External id": 290133,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367134695.374, "dur": 16985.802, + "args": { + "External id": 290134,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367134712.598, "dur": 16955.820, + "args": { + "External id": 290135,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367134735.613, "dur": 15.517, + "args": { + "External id": 290136,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367134754.997, "dur": 16843.482, + "args": { + "External id": 290137,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367134757.673, "dur": 16839.975, + "args": { + "External id": 290138,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367134765.002, "dur": 6.448, + "args": { + "External id": 290139,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367134773.270, "dur": 16818.996, + "args": { + "External id": 290140,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367151903.576, "dur": 40.347, + "args": { + "External id": 290141,"Sequence number": 1209179, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5673 + } + }, + { + "ph": "s", "id": 53, "pid": 2070547, "tid": 2070547, "ts": 5333367151903.576, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367151926.347, "dur": 12.463, + "args": { + "External id": 290142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367151931.527, "dur": 6.947, + "args": { + "External id": 290143,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367152015.045, "dur": 87.870, + "args": { + "External id": 290144,"Record function id": 0, "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367152104.406, "dur": 1194.139, + "args": { + "External id": 290145,"Record function id": 0, "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367152146.379, "dur": 1133.339, + "args": { + "External id": 290146,"Sequence number": 1209180, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5678 + } + }, + { + "ph": "s", "id": 52, "pid": 2070547, "tid": 2070547, "ts": 5333367152146.379, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367152246.378, "dur": 61.322, + "args": { + "External id": 290147,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367152324.435, "dur": 105.439, + "args": { + "External id": 290148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367152440.277, "dur": 38.177, + "args": { + "External id": 290149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367152486.666, "dur": 31.016, + "args": { + "External id": 290150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367152545.916, "dur": 28.881, + "args": { + "External id": 290151,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367152593.299, "dur": 15.263, + "args": { + "External id": 290152,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367152675.771, "dur": 142.603, + "args": { + "External id": 290153,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367152733.979, "dur": 12.981, + "args": { + "External id": 290154,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367152739.471, "dur": 6.548, + "args": { + "External id": 290155,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367152750.053, "dur": 6.248, + "args": { + "External id": 290156,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367152757.844, "dur": 1.073, + "args": { + "External id": 290157,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367152761.401, "dur": 4.186, + "args": { + "External id": 290158,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367152829.748, "dur": 53.590, + "args": { + "External id": 290159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367152916.472, "dur": 32.540, + "args": { + "External id": 290160,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367152958.214, "dur": 40.335, + "args": { + "External id": 290161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367153007.498, "dur": 34.211, + "args": { + "External id": 290162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367153062.631, "dur": 27.414, + "args": { + "External id": 290163,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367153095.561, "dur": 33.372, + "args": { + "External id": 290164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367153151.122, "dur": 39.933, + "args": { + "External id": 290165,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2070547, "tid": 2070547, + "ts": 5333367153377.015, "dur": 76.531, + "args": { + "External id": 290166,"Record function id": 0, "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367153531.482, "dur": 45.164, + "args": { + "External id": 290167,"Record function id": 0, "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2070547, "tid": 2070547, + "ts": 5333367153585.567, "dur": 18229.787, + "args": { + "External id": 290168,"Record function id": 0, "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070547, "tid": 2070547, + "ts": 5333367153592.965, "dur": 973.149, + "args": { + "External id": 290169,"Record function id": 0, "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367153717.520, "dur": 10.848, + "args": { + "External id": 290170,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367153742.441, "dur": 41.775, + "args": { + "External id": 290171,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153748.566, "dur": 2.274, + "args": { + "External id": 290172,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153755.887, "dur": 0.519, + "args": { + "External id": 290173,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153758.216, "dur": 0.404, + "args": { + "External id": 290174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153760.376, "dur": 0.405, + "args": { + "External id": 290175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153764.439, "dur": 0.366, + "args": { + "External id": 290176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153766.205, "dur": 0.681, + "args": { + "External id": 290177,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153768.322, "dur": 3.835, + "args": { + "External id": 290178,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153773.750, "dur": 0.350, + "args": { + "External id": 290179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153775.960, "dur": 0.327, + "args": { + "External id": 290180,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367153797.195, "dur": 49.900, + "args": { + "External id": 290181,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367153881.592, "dur": 118.523, + "args": { + "External id": 290182,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367153894.160, "dur": 3.863, + "args": { + "External id": 290183,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367153902.778, "dur": 10.559, + "args": { + "External id": 290184,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367153907.348, "dur": 5.579, + "args": { + "External id": 290185,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153911.033, "dur": 0.656, + "args": { + "External id": 290186,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367153920.783, "dur": 34.256, + "args": { + "External id": 290187,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153923.125, "dur": 2.526, + "args": { + "External id": 290188,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153927.373, "dur": 0.682, + "args": { + "External id": 290189,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153929.254, "dur": 0.578, + "args": { + "External id": 290190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153934.082, "dur": 2.010, + "args": { + "External id": 290191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153937.502, "dur": 0.133, + "args": { + "External id": 290192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153939.075, "dur": 0.300, + "args": { + "External id": 290193,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153943.765, "dur": 0.188, + "args": { + "External id": 290194,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153945.343, "dur": 0.520, + "args": { + "External id": 290195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367153947.100, "dur": 2.286, + "args": { + "External id": 290196,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367153966.761, "dur": 25.135, + "args": { + "External id": 290197,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367154053.701, "dur": 398.627, + "args": { + "External id": 290198,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367154087.731, "dur": 358.102, + "args": { + "External id": 290199,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5731, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367154098.339, "dur": 340.409, + "args": { + "External id": 290200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367154481.052, "dur": 3.359, + "args": { + "External id": 290201,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5733, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070547, "tid": 2070547, + "ts": 5333367154587.524, "dur": 16968.828, + "args": { + "External id": 290202,"Record function id": 0, "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154733.610, "dur": 7.724, + "args": { + "External id": 290203,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154744.993, "dur": 1.339, + "args": { + "External id": 290204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154748.126, "dur": 2.832, + "args": { + "External id": 290205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154752.504, "dur": 1.345, + "args": { + "External id": 290206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154755.232, "dur": 1.161, + "args": { + "External id": 290207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154757.863, "dur": 1.138, + "args": { + "External id": 290208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154762.412, "dur": 1.421, + "args": { + "External id": 290209,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154765.380, "dur": 2.544, + "args": { + "External id": 290210,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154769.384, "dur": 0.958, + "args": { + "External id": 290211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367154771.979, "dur": 0.847, + "args": { + "External id": 290212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367154794.658, "dur": 16705.514, + "args": { + "External id": 290213,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367154811.378, "dur": 16678.142, + "args": { + "External id": 290214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367154837.272, "dur": 15.587, + "args": { + "External id": 290215,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367154856.802, "dur": 16591.538, + "args": { + "External id": 290216,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367154859.494, "dur": 16587.933, + "args": { + "External id": 290217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367154866.852, "dur": 5.743, + "args": { + "External id": 290218,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367154874.431, "dur": 16568.637, + "args": { + "External id": 290219,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367171750.773, "dur": 38.324, + "args": { + "External id": 290220,"Sequence number": 1209181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5752 + } + }, + { + "ph": "s", "id": 51, "pid": 2070547, "tid": 2070547, "ts": 5333367171750.773, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367171772.327, "dur": 11.845, + "args": { + "External id": 290221,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367171777.548, "dur": 6.193, + "args": { + "External id": 290222,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367171857.550, "dur": 86.420, + "args": { + "External id": 290223,"Record function id": 0, "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367171945.728, "dur": 1168.350, + "args": { + "External id": 290224,"Record function id": 0, "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367171988.898, "dur": 1110.705, + "args": { + "External id": 290225,"Sequence number": 1209182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5757 + } + }, + { + "ph": "s", "id": 50, "pid": 2070547, "tid": 2070547, "ts": 5333367171988.898, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367172057.868, "dur": 46.619, + "args": { + "External id": 290226,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367172117.343, "dur": 127.034, + "args": { + "External id": 290227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367172265.197, "dur": 53.495, + "args": { + "External id": 290228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367172328.808, "dur": 31.512, + "args": { + "External id": 290229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367172395.557, "dur": 33.904, + "args": { + "External id": 290230,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367172446.994, "dur": 16.257, + "args": { + "External id": 290231,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367172483.953, "dur": 182.088, + "args": { + "External id": 290232,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367172541.550, "dur": 12.960, + "args": { + "External id": 290233,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367172547.683, "dur": 6.025, + "args": { + "External id": 290234,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367172557.640, "dur": 5.195, + "args": { + "External id": 290235,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367172564.330, "dur": 1.091, + "args": { + "External id": 290236,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367172568.143, "dur": 5.467, + "args": { + "External id": 290237,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367172680.760, "dur": 60.656, + "args": { + "External id": 290238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367172777.101, "dur": 31.744, + "args": { + "External id": 290239,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367172818.348, "dur": 42.388, + "args": { + "External id": 290240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367172867.119, "dur": 33.862, + "args": { + "External id": 290241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367172924.132, "dur": 27.972, + "args": { + "External id": 290242,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367172959.917, "dur": 35.856, + "args": { + "External id": 290243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367173012.705, "dur": 19.285, + "args": { + "External id": 290244,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2070547, "tid": 2070547, + "ts": 5333367173203.938, "dur": 101.730, + "args": { + "External id": 290245,"Record function id": 0, "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367173396.641, "dur": 50.108, + "args": { + "External id": 290246,"Record function id": 0, "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2070547, "tid": 2070547, + "ts": 5333367173456.316, "dur": 18396.144, + "args": { + "External id": 290247,"Record function id": 0, "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070547, "tid": 2070547, + "ts": 5333367173463.838, "dur": 981.079, + "args": { + "External id": 290248,"Record function id": 0, "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367173548.444, "dur": 10.642, + "args": { + "External id": 290249,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367173572.715, "dur": 43.399, + "args": { + "External id": 290250,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173578.161, "dur": 2.786, + "args": { + "External id": 290251,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173585.961, "dur": 0.710, + "args": { + "External id": 290252,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173588.584, "dur": 0.570, + "args": { + "External id": 290253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173590.830, "dur": 0.450, + "args": { + "External id": 290254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173594.773, "dur": 0.397, + "args": { + "External id": 290255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173596.646, "dur": 0.643, + "args": { + "External id": 290256,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173598.746, "dur": 4.775, + "args": { + "External id": 290257,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173605.146, "dur": 0.400, + "args": { + "External id": 290258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173607.106, "dur": 0.476, + "args": { + "External id": 290259,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367173666.173, "dur": 50.181, + "args": { + "External id": 290260,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367173753.343, "dur": 131.627, + "args": { + "External id": 290261,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367173767.469, "dur": 5.228, + "args": { + "External id": 290262,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367173777.804, "dur": 12.079, + "args": { + "External id": 290263,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367173782.976, "dur": 6.491, + "args": { + "External id": 290264,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173787.113, "dur": 0.696, + "args": { + "External id": 290265,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367173797.600, "dur": 33.934, + "args": { + "External id": 290266,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173800.301, "dur": 2.417, + "args": { + "External id": 290267,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173804.238, "dur": 0.555, + "args": { + "External id": 290268,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173806.029, "dur": 0.513, + "args": { + "External id": 290269,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173809.935, "dur": 2.510, + "args": { + "External id": 290270,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173813.926, "dur": 0.160, + "args": { + "External id": 290271,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173815.459, "dur": 0.360, + "args": { + "External id": 290272,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173819.425, "dur": 0.420, + "args": { + "External id": 290273,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173821.272, "dur": 0.366, + "args": { + "External id": 290274,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367173823.101, "dur": 2.372, + "args": { + "External id": 290275,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367173850.069, "dur": 26.614, + "args": { + "External id": 290276,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367173941.203, "dur": 390.369, + "args": { + "External id": 290277,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367173987.239, "dur": 337.694, + "args": { + "External id": 290278,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5810, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367173998.134, "dur": 318.196, + "args": { + "External id": 290279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367174361.050, "dur": 2.561, + "args": { + "External id": 290280,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5812, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070547, "tid": 2070547, + "ts": 5333367174467.819, "dur": 17112.280, + "args": { + "External id": 290281,"Record function id": 0, "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174571.550, "dur": 6.892, + "args": { + "External id": 290282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174582.289, "dur": 1.028, + "args": { + "External id": 290283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174585.180, "dur": 3.405, + "args": { + "External id": 290284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174590.040, "dur": 1.235, + "args": { + "External id": 290285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174592.797, "dur": 1.267, + "args": { + "External id": 290286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174597.734, "dur": 1.058, + "args": { + "External id": 290287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174600.447, "dur": 1.004, + "args": { + "External id": 290288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174602.892, "dur": 2.572, + "args": { + "External id": 290289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174607.157, "dur": 0.578, + "args": { + "External id": 290290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367174611.395, "dur": 0.925, + "args": { + "External id": 290291,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367174670.410, "dur": 16848.169, + "args": { + "External id": 290292,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367174689.008, "dur": 16818.440, + "args": { + "External id": 290293,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367174709.931, "dur": 17.095, + "args": { + "External id": 290294,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367174731.369, "dur": 16728.959, + "args": { + "External id": 290295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367174733.904, "dur": 16725.473, + "args": { + "External id": 290296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367174739.998, "dur": 7.106, + "args": { + "External id": 290297,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367174748.950, "dur": 16705.969, + "args": { + "External id": 290298,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367191782.192, "dur": 42.495, + "args": { + "External id": 290299,"Sequence number": 1209183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5831 + } + }, + { + "ph": "s", "id": 49, "pid": 2070547, "tid": 2070547, "ts": 5333367191782.192, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367191807.242, "dur": 12.415, + "args": { + "External id": 290300,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367191812.661, "dur": 6.620, + "args": { + "External id": 290301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367191894.000, "dur": 85.533, + "args": { + "External id": 290302,"Record function id": 0, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367191981.050, "dur": 1174.716, + "args": { + "External id": 290303,"Record function id": 0, "Ev Idx": 5835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367192022.087, "dur": 1118.802, + "args": { + "External id": 290304,"Sequence number": 1209184, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5836 + } + }, + { + "ph": "s", "id": 48, "pid": 2070547, "tid": 2070547, "ts": 5333367192022.087, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367192125.874, "dur": 72.036, + "args": { + "External id": 290305,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367192220.841, "dur": 114.479, + "args": { + "External id": 290306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367192346.589, "dur": 39.224, + "args": { + "External id": 290307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367192391.413, "dur": 30.203, + "args": { + "External id": 290308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367192454.908, "dur": 31.099, + "args": { + "External id": 290309,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367192501.119, "dur": 17.893, + "args": { + "External id": 290310,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367192537.963, "dur": 186.559, + "args": { + "External id": 290311,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367192591.890, "dur": 12.407, + "args": { + "External id": 290312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367192597.870, "dur": 5.705, + "args": { + "External id": 290313,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367192607.787, "dur": 5.558, + "args": { + "External id": 290314,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367192614.811, "dur": 3.141, + "args": { + "External id": 290315,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367192659.614, "dur": 7.452, + "args": { + "External id": 290316,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367192737.130, "dur": 53.275, + "args": { + "External id": 290317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367192820.654, "dur": 30.645, + "args": { + "External id": 290318,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367192859.483, "dur": 40.837, + "args": { + "External id": 290319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367192908.176, "dur": 34.669, + "args": { + "External id": 290320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367192967.366, "dur": 28.679, + "args": { + "External id": 290321,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367193001.710, "dur": 33.989, + "args": { + "External id": 290322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367193053.877, "dur": 21.800, + "args": { + "External id": 290323,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2070547, "tid": 2070547, + "ts": 5333367193251.713, "dur": 96.877, + "args": { + "External id": 290324,"Record function id": 0, "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367193432.885, "dur": 48.359, + "args": { + "External id": 290325,"Record function id": 0, "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2070547, "tid": 2070547, + "ts": 5333367193491.161, "dur": 18409.388, + "args": { + "External id": 290326,"Record function id": 0, "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070547, "tid": 2070547, + "ts": 5333367193500.092, "dur": 939.456, + "args": { + "External id": 290327,"Record function id": 0, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367193585.212, "dur": 10.547, + "args": { + "External id": 290328,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367193609.251, "dur": 81.922, + "args": { + "External id": 290329,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193614.818, "dur": 2.694, + "args": { + "External id": 290330,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193662.040, "dur": 0.585, + "args": { + "External id": 290331,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193664.962, "dur": 0.655, + "args": { + "External id": 290332,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193667.259, "dur": 0.404, + "args": { + "External id": 290333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193671.405, "dur": 0.253, + "args": { + "External id": 290334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193673.166, "dur": 0.610, + "args": { + "External id": 290335,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193675.660, "dur": 4.202, + "args": { + "External id": 290336,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193681.366, "dur": 0.377, + "args": { + "External id": 290337,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193683.560, "dur": 0.331, + "args": { + "External id": 290338,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367193704.282, "dur": 51.075, + "args": { + "External id": 290339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367193791.819, "dur": 124.361, + "args": { + "External id": 290340,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367193805.217, "dur": 5.710, + "args": { + "External id": 290341,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367193816.068, "dur": 11.142, + "args": { + "External id": 290342,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367193820.563, "dur": 6.220, + "args": { + "External id": 290343,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193824.757, "dur": 0.744, + "args": { + "External id": 290344,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367193835.256, "dur": 33.985, + "args": { + "External id": 290345,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193837.628, "dur": 2.589, + "args": { + "External id": 290346,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193841.848, "dur": 0.437, + "args": { + "External id": 290347,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193843.774, "dur": 0.402, + "args": { + "External id": 290348,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193848.461, "dur": 1.500, + "args": { + "External id": 290349,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193851.412, "dur": 0.161, + "args": { + "External id": 290350,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193853.621, "dur": 0.369, + "args": { + "External id": 290351,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193857.021, "dur": 0.449, + "args": { + "External id": 290352,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193858.936, "dur": 0.331, + "args": { + "External id": 290353,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367193860.987, "dur": 2.313, + "args": { + "External id": 290354,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367193881.074, "dur": 26.223, + "args": { + "External id": 290355,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367193972.781, "dur": 356.438, + "args": { + "External id": 290356,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367194007.405, "dur": 315.861, + "args": { + "External id": 290357,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5889, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367194018.563, "dur": 294.557, + "args": { + "External id": 290358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367194358.230, "dur": 2.992, + "args": { + "External id": 290359,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5891, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070547, "tid": 2070547, + "ts": 5333367194461.454, "dur": 17223.833, + "args": { + "External id": 290360,"Record function id": 0, "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194565.650, "dur": 6.517, + "args": { + "External id": 290361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194575.557, "dur": 1.366, + "args": { + "External id": 290362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194578.866, "dur": 2.162, + "args": { + "External id": 290363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194582.834, "dur": 1.012, + "args": { + "External id": 290364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194585.317, "dur": 1.071, + "args": { + "External id": 290365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194587.548, "dur": 1.125, + "args": { + "External id": 290366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194592.617, "dur": 0.961, + "args": { + "External id": 290367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194595.265, "dur": 2.773, + "args": { + "External id": 290368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194599.586, "dur": 1.079, + "args": { + "External id": 290369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367194602.085, "dur": 1.152, + "args": { + "External id": 290370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367194664.747, "dur": 16937.865, + "args": { + "External id": 290371,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367194683.566, "dur": 16909.042, + "args": { + "External id": 290372,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367194708.591, "dur": 15.278, + "args": { + "External id": 290373,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367194728.216, "dur": 16823.664, + "args": { + "External id": 290374,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367194730.803, "dur": 16819.933, + "args": { + "External id": 290375,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367194737.227, "dur": 6.255, + "args": { + "External id": 290376,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367194745.263, "dur": 16801.346, + "args": { + "External id": 290377,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367211839.773, "dur": 36.240, + "args": { + "External id": 290378,"Sequence number": 1209185, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5910 + } + }, + { + "ph": "s", "id": 47, "pid": 2070547, "tid": 2070547, "ts": 5333367211839.773, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367211860.175, "dur": 11.166, + "args": { + "External id": 290379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367211865.083, "dur": 5.981, + "args": { + "External id": 290380,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367211943.010, "dur": 88.415, + "args": { + "External id": 290381,"Record function id": 0, "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367212033.172, "dur": 1163.746, + "args": { + "External id": 290382,"Record function id": 0, "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367212073.900, "dur": 1089.381, + "args": { + "External id": 290383,"Sequence number": 1209186, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5915 + } + }, + { + "ph": "s", "id": 46, "pid": 2070547, "tid": 2070547, "ts": 5333367212073.900, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367212145.266, "dur": 70.128, + "args": { + "External id": 290384,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367212239.192, "dur": 113.139, + "args": { + "External id": 290385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367212363.562, "dur": 38.264, + "args": { + "External id": 290386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367212408.361, "dur": 30.236, + "args": { + "External id": 290387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367212473.770, "dur": 31.494, + "args": { + "External id": 290388,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367212521.892, "dur": 18.675, + "args": { + "External id": 290389,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367212560.325, "dur": 188.692, + "args": { + "External id": 290390,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367212614.112, "dur": 54.612, + "args": { + "External id": 290391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367212659.943, "dur": 7.796, + "args": { + "External id": 290392,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367212671.880, "dur": 5.255, + "args": { + "External id": 290393,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367212678.515, "dur": 3.398, + "args": { + "External id": 290394,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367212684.444, "dur": 6.199, + "args": { + "External id": 290395,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367212761.312, "dur": 52.813, + "args": { + "External id": 290396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367212846.997, "dur": 30.326, + "args": { + "External id": 290397,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367212886.622, "dur": 41.990, + "args": { + "External id": 290398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367212936.071, "dur": 34.335, + "args": { + "External id": 290399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367212994.879, "dur": 25.477, + "args": { + "External id": 290400,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367213026.104, "dur": 33.010, + "args": { + "External id": 290401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367213076.380, "dur": 20.369, + "args": { + "External id": 290402,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2070547, "tid": 2070547, + "ts": 5333367213279.376, "dur": 90.875, + "args": { + "External id": 290403,"Record function id": 0, "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367213450.004, "dur": 49.050, + "args": { + "External id": 290404,"Record function id": 0, "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2070547, "tid": 2070547, + "ts": 5333367213509.237, "dur": 18365.643, + "args": { + "External id": 290405,"Record function id": 0, "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070547, "tid": 2070547, + "ts": 5333367213517.906, "dur": 964.327, + "args": { + "External id": 290406,"Record function id": 0, "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367213602.672, "dur": 10.602, + "args": { + "External id": 290407,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367213666.778, "dur": 42.843, + "args": { + "External id": 290408,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213672.629, "dur": 2.600, + "args": { + "External id": 290409,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213679.857, "dur": 0.420, + "args": { + "External id": 290410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213681.933, "dur": 0.644, + "args": { + "External id": 290411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213684.268, "dur": 0.718, + "args": { + "External id": 290412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213688.962, "dur": 0.280, + "args": { + "External id": 290413,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213691.177, "dur": 0.460, + "args": { + "External id": 290414,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213693.136, "dur": 4.338, + "args": { + "External id": 290415,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213699.325, "dur": 0.556, + "args": { + "External id": 290416,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213701.601, "dur": 0.146, + "args": { + "External id": 290417,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367213722.024, "dur": 49.968, + "args": { + "External id": 290418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367213807.974, "dur": 123.104, + "args": { + "External id": 290419,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367213821.043, "dur": 5.033, + "args": { + "External id": 290420,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367213831.312, "dur": 10.724, + "args": { + "External id": 290421,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367213836.067, "dur": 5.580, + "args": { + "External id": 290422,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213839.833, "dur": 0.410, + "args": { + "External id": 290423,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367213848.730, "dur": 33.353, + "args": { + "External id": 290424,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213851.548, "dur": 2.488, + "args": { + "External id": 290425,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213855.654, "dur": 0.391, + "args": { + "External id": 290426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213857.629, "dur": 0.199, + "args": { + "External id": 290427,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213861.722, "dur": 1.739, + "args": { + "External id": 290428,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213864.790, "dur": 0.153, + "args": { + "External id": 290429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213866.638, "dur": 0.294, + "args": { + "External id": 290430,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213870.497, "dur": 0.359, + "args": { + "External id": 290431,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213872.534, "dur": 0.169, + "args": { + "External id": 290432,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367213874.178, "dur": 2.526, + "args": { + "External id": 290433,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367213896.405, "dur": 26.449, + "args": { + "External id": 290434,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367213987.687, "dur": 380.617, + "args": { + "External id": 290435,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367214022.905, "dur": 338.922, + "args": { + "External id": 290436,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5968, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367214038.003, "dur": 316.781, + "args": { + "External id": 290437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367214397.962, "dur": 3.264, + "args": { + "External id": 290438,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5970, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070547, "tid": 2070547, + "ts": 5333367214504.603, "dur": 17108.996, + "args": { + "External id": 290439,"Record function id": 0, "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214611.758, "dur": 43.733, + "args": { + "External id": 290440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214662.133, "dur": 1.609, + "args": { + "External id": 290441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214665.713, "dur": 2.518, + "args": { + "External id": 290442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214670.110, "dur": 1.070, + "args": { + "External id": 290443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214672.927, "dur": 0.840, + "args": { + "External id": 290444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214675.373, "dur": 1.230, + "args": { + "External id": 290445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214680.071, "dur": 1.210, + "args": { + "External id": 290446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214682.884, "dur": 2.769, + "args": { + "External id": 290447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214687.472, "dur": 1.397, + "args": { + "External id": 290448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367214690.609, "dur": 0.660, + "args": { + "External id": 290449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367214714.742, "dur": 16843.384, + "args": { + "External id": 290450,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367214732.099, "dur": 16815.583, + "args": { + "External id": 290451,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367214753.249, "dur": 14.768, + "args": { + "External id": 290452,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367214771.784, "dur": 16733.287, + "args": { + "External id": 290453,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367214774.379, "dur": 16729.854, + "args": { + "External id": 290454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367214780.955, "dur": 7.162, + "args": { + "External id": 290455,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367214789.829, "dur": 16710.128, + "args": { + "External id": 290456,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367231807.759, "dur": 39.541, + "args": { + "External id": 290457,"Sequence number": 1209187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5989 + } + }, + { + "ph": "s", "id": 45, "pid": 2070547, "tid": 2070547, "ts": 5333367231807.759, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367231831.084, "dur": 11.250, + "args": { + "External id": 290458,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367231835.723, "dur": 6.157, + "args": { + "External id": 290459,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367231917.202, "dur": 87.505, + "args": { + "External id": 290460,"Record function id": 0, "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367232006.123, "dur": 1151.266, + "args": { + "External id": 290461,"Record function id": 0, "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367232048.664, "dur": 1095.091, + "args": { + "External id": 290462,"Sequence number": 1209188, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5994 + } + }, + { + "ph": "s", "id": 44, "pid": 2070547, "tid": 2070547, "ts": 5333367232048.664, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367232117.553, "dur": 65.754, + "args": { + "External id": 290463,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367232203.842, "dur": 110.467, + "args": { + "External id": 290464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367232326.345, "dur": 40.103, + "args": { + "External id": 290465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367232374.845, "dur": 30.154, + "args": { + "External id": 290466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367232437.008, "dur": 32.610, + "args": { + "External id": 290467,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367232489.849, "dur": 16.786, + "args": { + "External id": 290468,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367232529.070, "dur": 183.434, + "args": { + "External id": 290469,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367232584.451, "dur": 12.391, + "args": { + "External id": 290470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367232590.486, "dur": 5.278, + "args": { + "External id": 290471,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367232599.680, "dur": 5.701, + "args": { + "External id": 290472,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367232606.670, "dur": 0.907, + "args": { + "External id": 290473,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367232610.506, "dur": 5.472, + "args": { + "External id": 290474,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367232726.567, "dur": 56.371, + "args": { + "External id": 290475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367232814.195, "dur": 31.022, + "args": { + "External id": 290476,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367232855.764, "dur": 41.134, + "args": { + "External id": 290477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367232905.203, "dur": 35.401, + "args": { + "External id": 290478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367232962.702, "dur": 26.326, + "args": { + "External id": 290479,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367232994.421, "dur": 33.695, + "args": { + "External id": 290480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367233047.697, "dur": 22.343, + "args": { + "External id": 290481,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6013 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2070547, "tid": 2070547, + "ts": 5333367233256.129, "dur": 96.475, + "args": { + "External id": 290482,"Record function id": 0, "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367233434.912, "dur": 49.184, + "args": { + "External id": 290483,"Record function id": 0, "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2070547, "tid": 2070547, + "ts": 5333367233493.797, "dur": 18233.558, + "args": { + "External id": 290484,"Record function id": 0, "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070547, "tid": 2070547, + "ts": 5333367233500.958, "dur": 930.334, + "args": { + "External id": 290485,"Record function id": 0, "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367233585.488, "dur": 10.359, + "args": { + "External id": 290486,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367233610.369, "dur": 83.654, + "args": { + "External id": 290487,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233616.938, "dur": 39.461, + "args": { + "External id": 290488,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233663.087, "dur": 0.535, + "args": { + "External id": 290489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233665.453, "dur": 0.222, + "args": { + "External id": 290490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233667.622, "dur": 0.582, + "args": { + "External id": 290491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233673.089, "dur": 0.388, + "args": { + "External id": 290492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233675.040, "dur": 0.357, + "args": { + "External id": 290493,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233676.899, "dur": 4.608, + "args": { + "External id": 290494,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233683.250, "dur": 0.374, + "args": { + "External id": 290495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233685.447, "dur": 0.180, + "args": { + "External id": 290496,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367233707.503, "dur": 49.254, + "args": { + "External id": 290497,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367233793.620, "dur": 121.750, + "args": { + "External id": 290498,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367233807.012, "dur": 5.322, + "args": { + "External id": 290499,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367233817.571, "dur": 10.607, + "args": { + "External id": 290500,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367233821.849, "dur": 5.939, + "args": { + "External id": 290501,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233825.778, "dur": 0.412, + "args": { + "External id": 290502,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367233836.016, "dur": 33.460, + "args": { + "External id": 290503,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233838.553, "dur": 2.951, + "args": { + "External id": 290504,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233842.907, "dur": 0.213, + "args": { + "External id": 290505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233844.915, "dur": 0.352, + "args": { + "External id": 290506,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233849.495, "dur": 1.398, + "args": { + "External id": 290507,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233852.368, "dur": 0.144, + "args": { + "External id": 290508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233854.094, "dur": 0.148, + "args": { + "External id": 290509,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233858.020, "dur": 0.155, + "args": { + "External id": 290510,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233859.578, "dur": 0.168, + "args": { + "External id": 290511,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367233861.674, "dur": 2.107, + "args": { + "External id": 290512,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367233880.848, "dur": 26.561, + "args": { + "External id": 290513,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367233970.993, "dur": 350.085, + "args": { + "External id": 290514,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367234005.874, "dur": 309.243, + "args": { + "External id": 290515,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6047, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367234016.969, "dur": 290.251, + "args": { + "External id": 290516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367234350.345, "dur": 2.753, + "args": { + "External id": 290517,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6049, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070547, "tid": 2070547, + "ts": 5333367234452.448, "dur": 17002.032, + "args": { + "External id": 290518,"Record function id": 0, "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234558.711, "dur": 7.031, + "args": { + "External id": 290519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234568.963, "dur": 1.153, + "args": { + "External id": 290520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234571.676, "dur": 2.738, + "args": { + "External id": 290521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234576.194, "dur": 0.985, + "args": { + "External id": 290522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234578.594, "dur": 0.924, + "args": { + "External id": 290523,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234580.941, "dur": 0.889, + "args": { + "External id": 290524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234585.710, "dur": 0.985, + "args": { + "External id": 290525,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234588.339, "dur": 2.276, + "args": { + "External id": 290526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234592.159, "dur": 0.663, + "args": { + "External id": 290527,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367234594.275, "dur": 0.850, + "args": { + "External id": 290528,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367234615.922, "dur": 16778.259, + "args": { + "External id": 290529,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367234670.239, "dur": 16713.422, + "args": { + "External id": 290530,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367234694.646, "dur": 14.972, + "args": { + "External id": 290531,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367234713.397, "dur": 16628.688, + "args": { + "External id": 290532,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367234715.998, "dur": 16625.179, + "args": { + "External id": 290533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367234722.691, "dur": 5.158, + "args": { + "External id": 290534,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367234729.670, "dur": 16607.015, + "args": { + "External id": 290535,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367251608.027, "dur": 88.475, + "args": { + "External id": 290536,"Sequence number": 1209189, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6068 + } + }, + { + "ph": "s", "id": 43, "pid": 2070547, "tid": 2070547, "ts": 5333367251608.027, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367251679.452, "dur": 11.735, + "args": { + "External id": 290537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367251684.239, "dur": 6.555, + "args": { + "External id": 290538,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367251769.682, "dur": 86.806, + "args": { + "External id": 290539,"Record function id": 0, "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367251858.567, "dur": 1478.852, + "args": { + "External id": 290540,"Record function id": 0, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367251902.797, "dur": 1390.567, + "args": { + "External id": 290541,"Sequence number": 1209190, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6073 + } + }, + { + "ph": "s", "id": 42, "pid": 2070547, "tid": 2070547, "ts": 5333367251902.797, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367251974.886, "dur": 47.273, + "args": { + "External id": 290542,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367252035.124, "dur": 104.937, + "args": { + "External id": 290543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367252152.455, "dur": 225.212, + "args": { + "External id": 290544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367252390.594, "dur": 36.366, + "args": { + "External id": 290545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367252459.480, "dur": 28.111, + "args": { + "External id": 290546,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367252504.931, "dur": 17.002, + "args": { + "External id": 290547,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367252544.435, "dur": 184.665, + "args": { + "External id": 290548,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367252599.850, "dur": 12.409, + "args": { + "External id": 290549,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367252605.727, "dur": 5.673, + "args": { + "External id": 290550,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367252614.982, "dur": 42.993, + "args": { + "External id": 290551,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367252661.118, "dur": 1.420, + "args": { + "External id": 290552,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367252667.030, "dur": 4.022, + "args": { + "External id": 290553,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367252741.117, "dur": 54.658, + "args": { + "External id": 290554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367252827.684, "dur": 30.804, + "args": { + "External id": 290555,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367252867.637, "dur": 43.070, + "args": { + "External id": 290556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367252916.787, "dur": 35.626, + "args": { + "External id": 290557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367252976.492, "dur": 36.580, + "args": { + "External id": 290558,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367253021.694, "dur": 78.678, + "args": { + "External id": 290559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367253139.532, "dur": 24.344, + "args": { + "External id": 290560,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2070547, "tid": 2070547, + "ts": 5333367253412.271, "dur": 82.273, + "args": { + "External id": 290561,"Record function id": 0, "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367253571.753, "dur": 90.297, + "args": { + "External id": 290562,"Record function id": 0, "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2070547, "tid": 2070547, + "ts": 5333367253674.676, "dur": 16428.138, + "args": { + "External id": 290563,"Record function id": 0, "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070547, "tid": 2070547, + "ts": 5333367253683.065, "dur": 1090.462, + "args": { + "External id": 290564,"Record function id": 0, "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367253768.066, "dur": 11.360, + "args": { + "External id": 290565,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367253793.868, "dur": 43.111, + "args": { + "External id": 290566,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253800.197, "dur": 2.511, + "args": { + "External id": 290567,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253807.665, "dur": 0.284, + "args": { + "External id": 290568,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253809.616, "dur": 0.246, + "args": { + "External id": 290569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253811.823, "dur": 0.749, + "args": { + "External id": 290570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253816.088, "dur": 0.208, + "args": { + "External id": 290571,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253818.021, "dur": 0.540, + "args": { + "External id": 290572,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253819.962, "dur": 3.820, + "args": { + "External id": 290573,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253825.806, "dur": 0.187, + "args": { + "External id": 290574,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253827.848, "dur": 0.412, + "args": { + "External id": 290575,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367253849.462, "dur": 51.466, + "args": { + "External id": 290576,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367253935.067, "dur": 127.179, + "args": { + "External id": 290577,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367253947.375, "dur": 3.926, + "args": { + "External id": 290578,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367253955.919, "dur": 11.153, + "args": { + "External id": 290579,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367253960.871, "dur": 5.807, + "args": { + "External id": 290580,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253964.654, "dur": 0.639, + "args": { + "External id": 290581,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367253973.880, "dur": 40.218, + "args": { + "External id": 290582,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253976.524, "dur": 2.670, + "args": { + "External id": 290583,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253980.946, "dur": 0.315, + "args": { + "External id": 290584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253988.756, "dur": 0.244, + "args": { + "External id": 290585,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253993.101, "dur": 1.808, + "args": { + "External id": 290586,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253996.349, "dur": 0.159, + "args": { + "External id": 290587,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367253998.102, "dur": 2.062, + "args": { + "External id": 290588,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367254001.455, "dur": 0.626, + "args": { + "External id": 290589,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367254004.134, "dur": 0.386, + "args": { + "External id": 290590,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367254008.541, "dur": 0.402, + "args": { + "External id": 290591,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367254027.467, "dur": 25.870, + "args": { + "External id": 290592,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367254117.090, "dur": 535.425, + "args": { + "External id": 290593,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367254153.948, "dur": 459.842, + "args": { + "External id": 290594,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6126, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367254212.801, "dur": 393.983, + "args": { + "External id": 290595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367254683.575, "dur": 4.030, + "args": { + "External id": 290596,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6128, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070547, "tid": 2070547, + "ts": 5333367254797.015, "dur": 15091.175, + "args": { + "External id": 290597,"Record function id": 0, "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254905.090, "dur": 7.208, + "args": { + "External id": 290598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254915.723, "dur": 1.288, + "args": { + "External id": 290599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254918.894, "dur": 2.113, + "args": { + "External id": 290600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254922.626, "dur": 1.137, + "args": { + "External id": 290601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254924.932, "dur": 0.897, + "args": { + "External id": 290602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254927.014, "dur": 1.005, + "args": { + "External id": 290603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254938.089, "dur": 1.111, + "args": { + "External id": 290604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254940.947, "dur": 3.086, + "args": { + "External id": 290605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254945.693, "dur": 0.797, + "args": { + "External id": 290606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367254947.974, "dur": 1.179, + "args": { + "External id": 290607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367254972.806, "dur": 14858.705, + "args": { + "External id": 290608,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367254991.452, "dur": 14829.214, + "args": { + "External id": 290609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367255009.322, "dur": 16.100, + "args": { + "External id": 290610,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367255029.186, "dur": 14749.044, + "args": { + "External id": 290611,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367255031.952, "dur": 14745.386, + "args": { + "External id": 290612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367255038.181, "dur": 6.825, + "args": { + "External id": 290613,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367255046.697, "dur": 14726.587, + "args": { + "External id": 290614,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367270039.573, "dur": 38.748, + "args": { + "External id": 290615,"Sequence number": 1209191, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6147 + } + }, + { + "ph": "s", "id": 41, "pid": 2070547, "tid": 2070547, "ts": 5333367270039.573, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367270062.309, "dur": 11.151, + "args": { + "External id": 290616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367270067.572, "dur": 5.706, + "args": { + "External id": 290617,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367270143.095, "dur": 113.832, + "args": { + "External id": 290618,"Record function id": 0, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367270260.707, "dur": 1203.581, + "args": { + "External id": 290619,"Record function id": 0, "Ev Idx": 6151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367270321.627, "dur": 1126.500, + "args": { + "External id": 290620,"Sequence number": 1209192, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6152 + } + }, + { + "ph": "s", "id": 40, "pid": 2070547, "tid": 2070547, "ts": 5333367270321.627, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367270401.490, "dur": 53.533, + "args": { + "External id": 290621,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367270469.140, "dur": 104.458, + "args": { + "External id": 290622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367270583.148, "dur": 79.007, + "args": { + "External id": 290623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367270674.532, "dur": 36.358, + "args": { + "External id": 290624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367270740.887, "dur": 28.678, + "args": { + "External id": 290625,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367270787.910, "dur": 15.455, + "args": { + "External id": 290626,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367270824.680, "dur": 135.986, + "args": { + "External id": 290627,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367270878.711, "dur": 12.138, + "args": { + "External id": 290628,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367270885.076, "dur": 4.917, + "args": { + "External id": 290629,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367270893.977, "dur": 5.760, + "args": { + "External id": 290630,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367270901.066, "dur": 1.162, + "args": { + "External id": 290631,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367270904.633, "dur": 5.357, + "args": { + "External id": 290632,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367270972.398, "dur": 46.317, + "args": { + "External id": 290633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367271049.556, "dur": 28.456, + "args": { + "External id": 290634,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367271087.948, "dur": 39.700, + "args": { + "External id": 290635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367271135.864, "dur": 53.766, + "args": { + "External id": 290636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367271222.930, "dur": 36.534, + "args": { + "External id": 290637,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367271268.467, "dur": 50.184, + "args": { + "External id": 290638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367271345.156, "dur": 22.883, + "args": { + "External id": 290639,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2070547, "tid": 2070547, + "ts": 5333367271533.792, "dur": 80.068, + "args": { + "External id": 290640,"Record function id": 0, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367271738.423, "dur": 50.122, + "args": { + "External id": 290641,"Record function id": 0, "Ev Idx": 6173 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2070547, "tid": 2070547, + "ts": 5333367271798.388, "dur": 18481.937, + "args": { + "External id": 290642,"Record function id": 0, "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070547, "tid": 2070547, + "ts": 5333367271807.518, "dur": 898.672, + "args": { + "External id": 290643,"Record function id": 0, "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367271891.730, "dur": 9.927, + "args": { + "External id": 290644,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367271915.191, "dur": 42.202, + "args": { + "External id": 290645,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271920.689, "dur": 2.393, + "args": { + "External id": 290646,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271927.684, "dur": 0.656, + "args": { + "External id": 290647,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271930.102, "dur": 0.481, + "args": { + "External id": 290648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271931.995, "dur": 0.660, + "args": { + "External id": 290649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271936.375, "dur": 0.519, + "args": { + "External id": 290650,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271938.546, "dur": 0.214, + "args": { + "External id": 290651,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271940.093, "dur": 4.464, + "args": { + "External id": 290652,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271946.201, "dur": 0.520, + "args": { + "External id": 290653,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367271948.542, "dur": 0.326, + "args": { + "External id": 290654,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367271968.936, "dur": 46.886, + "args": { + "External id": 290655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367272049.247, "dur": 134.486, + "args": { + "External id": 290656,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367272061.131, "dur": 3.876, + "args": { + "External id": 290657,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367272070.136, "dur": 10.914, + "args": { + "External id": 290658,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367272074.899, "dur": 5.766, + "args": { + "External id": 290659,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272078.958, "dur": 0.569, + "args": { + "External id": 290660,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367272088.651, "dur": 32.749, + "args": { + "External id": 290661,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272091.678, "dur": 2.730, + "args": { + "External id": 290662,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272096.220, "dur": 0.394, + "args": { + "External id": 290663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272098.047, "dur": 0.364, + "args": { + "External id": 290664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272102.111, "dur": 1.783, + "args": { + "External id": 290665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272105.356, "dur": 0.335, + "args": { + "External id": 290666,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272107.161, "dur": 0.190, + "args": { + "External id": 290667,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272110.671, "dur": 0.573, + "args": { + "External id": 290668,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272112.715, "dur": 0.189, + "args": { + "External id": 290669,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272114.538, "dur": 1.580, + "args": { + "External id": 290670,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367272132.954, "dur": 25.914, + "args": { + "External id": 290671,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367272254.543, "dur": 314.964, + "args": { + "External id": 290672,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367272293.980, "dur": 270.787, + "args": { + "External id": 290673,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6205, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367272306.164, "dur": 253.592, + "args": { + "External id": 290674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367272590.855, "dur": 2.442, + "args": { + "External id": 290675,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6207, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070547, "tid": 2070547, + "ts": 5333367272730.351, "dur": 17301.595, + "args": { + "External id": 290676,"Record function id": 0, "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272837.662, "dur": 7.243, + "args": { + "External id": 290677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272848.600, "dur": 1.065, + "args": { + "External id": 290678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272851.435, "dur": 2.346, + "args": { + "External id": 290679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272855.608, "dur": 1.336, + "args": { + "External id": 290680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272858.308, "dur": 0.697, + "args": { + "External id": 290681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272860.369, "dur": 0.896, + "args": { + "External id": 290682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272865.205, "dur": 1.027, + "args": { + "External id": 290683,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272868.393, "dur": 2.379, + "args": { + "External id": 290684,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272872.164, "dur": 0.769, + "args": { + "External id": 290685,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367272874.528, "dur": 0.671, + "args": { + "External id": 290686,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367272906.105, "dur": 17067.716, + "args": { + "External id": 290687,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367272923.520, "dur": 17040.022, + "args": { + "External id": 290688,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367272941.478, "dur": 13.478, + "args": { + "External id": 290689,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367272958.942, "dur": 16963.151, + "args": { + "External id": 290690,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367272961.497, "dur": 16959.511, + "args": { + "External id": 290691,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367272968.048, "dur": 5.519, + "args": { + "External id": 290692,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367272975.047, "dur": 16941.595, + "args": { + "External id": 290693,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367290199.949, "dur": 44.740, + "args": { + "External id": 290694,"Sequence number": 1209193, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6226 + } + }, + { + "ph": "s", "id": 39, "pid": 2070547, "tid": 2070547, "ts": 5333367290199.949, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367290225.347, "dur": 13.279, + "args": { + "External id": 290695,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367290231.227, "dur": 6.881, + "args": { + "External id": 290696,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367290326.224, "dur": 85.728, + "args": { + "External id": 290697,"Record function id": 0, "Ev Idx": 6229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367290413.219, "dur": 1162.203, + "args": { + "External id": 290698,"Record function id": 0, "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367290458.045, "dur": 1102.111, + "args": { + "External id": 290699,"Sequence number": 1209194, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6231 + } + }, + { + "ph": "s", "id": 38, "pid": 2070547, "tid": 2070547, "ts": 5333367290458.045, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367290530.074, "dur": 50.932, + "args": { + "External id": 290700,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367290593.761, "dur": 137.466, + "args": { + "External id": 290701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367290747.246, "dur": 40.601, + "args": { + "External id": 290702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367290796.992, "dur": 31.180, + "args": { + "External id": 290703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367290856.527, "dur": 29.521, + "args": { + "External id": 290704,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367290904.884, "dur": 17.673, + "args": { + "External id": 290705,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367290942.824, "dur": 137.265, + "args": { + "External id": 290706,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367290997.883, "dur": 12.199, + "args": { + "External id": 290707,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367291003.744, "dur": 5.498, + "args": { + "External id": 290708,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367291012.951, "dur": 5.597, + "args": { + "External id": 290709,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367291019.999, "dur": 1.194, + "args": { + "External id": 290710,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367291023.680, "dur": 3.956, + "args": { + "External id": 290711,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367291091.417, "dur": 44.164, + "args": { + "External id": 290712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367291164.422, "dur": 57.113, + "args": { + "External id": 290713,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367291236.061, "dur": 61.412, + "args": { + "External id": 290714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367291309.027, "dur": 40.932, + "args": { + "External id": 290715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367291377.153, "dur": 26.947, + "args": { + "External id": 290716,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367291409.800, "dur": 34.828, + "args": { + "External id": 290717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367291465.156, "dur": 20.739, + "args": { + "External id": 290718,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2070547, "tid": 2070547, + "ts": 5333367291686.956, "dur": 78.538, + "args": { + "External id": 290719,"Record function id": 0, "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367291844.231, "dur": 45.369, + "args": { + "External id": 290720,"Record function id": 0, "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2070547, "tid": 2070547, + "ts": 5333367291898.950, "dur": 18588.874, + "args": { + "External id": 290721,"Record function id": 0, "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070547, "tid": 2070547, + "ts": 5333367291906.895, "dur": 924.413, + "args": { + "External id": 290722,"Record function id": 0, "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367291992.265, "dur": 10.038, + "args": { + "External id": 290723,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367292016.345, "dur": 39.289, + "args": { + "External id": 290724,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292021.669, "dur": 2.188, + "args": { + "External id": 290725,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292028.828, "dur": 0.739, + "args": { + "External id": 290726,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292030.943, "dur": 0.443, + "args": { + "External id": 290727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292033.453, "dur": 0.528, + "args": { + "External id": 290728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292037.172, "dur": 0.413, + "args": { + "External id": 290729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292039.123, "dur": 0.422, + "args": { + "External id": 290730,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292041.152, "dur": 2.868, + "args": { + "External id": 290731,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292045.838, "dur": 0.169, + "args": { + "External id": 290732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292047.817, "dur": 0.373, + "args": { + "External id": 290733,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367292067.582, "dur": 46.845, + "args": { + "External id": 290734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367292147.895, "dur": 171.430, + "args": { + "External id": 290735,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367292158.403, "dur": 3.341, + "args": { + "External id": 290736,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367292185.610, "dur": 14.747, + "args": { + "External id": 290737,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367292191.027, "dur": 8.759, + "args": { + "External id": 290738,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292196.177, "dur": 1.379, + "args": { + "External id": 290739,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367292211.741, "dur": 43.015, + "args": { + "External id": 290740,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292215.132, "dur": 2.913, + "args": { + "External id": 290741,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292220.507, "dur": 0.336, + "args": { + "External id": 290742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292223.792, "dur": 0.292, + "args": { + "External id": 290743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292228.934, "dur": 1.781, + "args": { + "External id": 290744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292232.816, "dur": 0.296, + "args": { + "External id": 290745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292235.810, "dur": 0.276, + "args": { + "External id": 290746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292240.765, "dur": 0.289, + "args": { + "External id": 290747,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292243.618, "dur": 0.456, + "args": { + "External id": 290748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367292246.288, "dur": 1.873, + "args": { + "External id": 290749,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367292273.757, "dur": 36.562, + "args": { + "External id": 290750,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367292377.670, "dur": 353.225, + "args": { + "External id": 290751,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367292411.596, "dur": 314.111, + "args": { + "External id": 290752,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6284, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367292422.578, "dur": 297.023, + "args": { + "External id": 290753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367292756.352, "dur": 2.626, + "args": { + "External id": 290754,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6286, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070547, "tid": 2070547, + "ts": 5333367292853.226, "dur": 17420.625, + "args": { + "External id": 290755,"Record function id": 0, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292953.412, "dur": 6.714, + "args": { + "External id": 290756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292963.432, "dur": 1.274, + "args": { + "External id": 290757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292979.932, "dur": 2.687, + "args": { + "External id": 290758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292986.324, "dur": 0.836, + "args": { + "External id": 290759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292988.975, "dur": 1.039, + "args": { + "External id": 290760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292991.255, "dur": 0.914, + "args": { + "External id": 290761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292995.372, "dur": 0.930, + "args": { + "External id": 290762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367292999.502, "dur": 1.740, + "args": { + "External id": 290763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367293002.743, "dur": 0.793, + "args": { + "External id": 290764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367293005.228, "dur": 0.623, + "args": { + "External id": 290765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367293025.779, "dur": 17183.314, + "args": { + "External id": 290766,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367293044.213, "dur": 17151.808, + "args": { + "External id": 290767,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367293061.037, "dur": 15.247, + "args": { + "External id": 290768,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367293082.496, "dur": 17055.129, + "args": { + "External id": 290769,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367293084.883, "dur": 17051.584, + "args": { + "External id": 290770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367293091.689, "dur": 5.519, + "args": { + "External id": 290771,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367293098.955, "dur": 17032.834, + "args": { + "External id": 290772,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367310425.302, "dur": 37.039, + "args": { + "External id": 290773,"Sequence number": 1209195, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6305 + } + }, + { + "ph": "s", "id": 37, "pid": 2070547, "tid": 2070547, "ts": 5333367310425.302, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367310446.100, "dur": 11.473, + "args": { + "External id": 290774,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367310451.105, "dur": 6.107, + "args": { + "External id": 290775,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367310528.856, "dur": 86.528, + "args": { + "External id": 290776,"Record function id": 0, "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367310617.399, "dur": 1216.249, + "args": { + "External id": 290777,"Record function id": 0, "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367310695.960, "dur": 1122.450, + "args": { + "External id": 290778,"Sequence number": 1209196, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6310 + } + }, + { + "ph": "s", "id": 36, "pid": 2070547, "tid": 2070547, "ts": 5333367310695.960, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367310770.102, "dur": 50.421, + "args": { + "External id": 290779,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367310834.955, "dur": 106.407, + "args": { + "External id": 290780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367310950.406, "dur": 38.335, + "args": { + "External id": 290781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367310998.010, "dur": 30.703, + "args": { + "External id": 290782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367311052.152, "dur": 28.650, + "args": { + "External id": 290783,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367311101.301, "dur": 14.789, + "args": { + "External id": 290784,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367311136.124, "dur": 187.205, + "args": { + "External id": 290785,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367311209.212, "dur": 21.392, + "args": { + "External id": 290786,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367311216.784, "dur": 7.846, + "args": { + "External id": 290787,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367311234.493, "dur": 6.729, + "args": { + "External id": 290788,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367311243.339, "dur": 1.492, + "args": { + "External id": 290789,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367311248.434, "dur": 5.121, + "args": { + "External id": 290790,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367311336.274, "dur": 60.611, + "args": { + "External id": 290791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367311428.976, "dur": 32.027, + "args": { + "External id": 290792,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367311470.226, "dur": 40.581, + "args": { + "External id": 290793,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367311519.546, "dur": 34.827, + "args": { + "External id": 290794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367311577.919, "dur": 25.385, + "args": { + "External id": 290795,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367311609.207, "dur": 79.105, + "args": { + "External id": 290796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367311716.723, "dur": 23.259, + "args": { + "External id": 290797,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2070547, "tid": 2070547, + "ts": 5333367311901.644, "dur": 76.388, + "args": { + "External id": 290798,"Record function id": 0, "Ev Idx": 6330 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367312054.962, "dur": 49.110, + "args": { + "External id": 290799,"Record function id": 0, "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2070547, "tid": 2070547, + "ts": 5333367312113.642, "dur": 18399.145, + "args": { + "External id": 290800,"Record function id": 0, "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070547, "tid": 2070547, + "ts": 5333367312121.764, "dur": 928.805, + "args": { + "External id": 290801,"Record function id": 0, "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367312232.750, "dur": 11.235, + "args": { + "External id": 290802,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367312260.462, "dur": 47.993, + "args": { + "External id": 290803,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312267.893, "dur": 2.695, + "args": { + "External id": 290804,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312276.599, "dur": 0.539, + "args": { + "External id": 290805,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312279.299, "dur": 0.742, + "args": { + "External id": 290806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312282.458, "dur": 1.082, + "args": { + "External id": 290807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312288.652, "dur": 0.820, + "args": { + "External id": 290808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312290.959, "dur": 0.503, + "args": { + "External id": 290809,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312292.808, "dur": 3.563, + "args": { + "External id": 290810,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312298.352, "dur": 0.379, + "args": { + "External id": 290811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312300.442, "dur": 0.329, + "args": { + "External id": 290812,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367312320.466, "dur": 48.473, + "args": { + "External id": 290813,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367312405.164, "dur": 146.195, + "args": { + "External id": 290814,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367312416.938, "dur": 4.791, + "args": { + "External id": 290815,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367312426.468, "dur": 36.860, + "args": { + "External id": 290816,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367312456.682, "dur": 6.202, + "args": { + "External id": 290817,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312461.041, "dur": 0.485, + "args": { + "External id": 290818,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367312470.641, "dur": 32.822, + "args": { + "External id": 290819,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312472.954, "dur": 2.538, + "args": { + "External id": 290820,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312477.547, "dur": 0.508, + "args": { + "External id": 290821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312479.500, "dur": 0.373, + "args": { + "External id": 290822,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312483.760, "dur": 1.952, + "args": { + "External id": 290823,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312487.158, "dur": 0.160, + "args": { + "External id": 290824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312488.778, "dur": 0.176, + "args": { + "External id": 290825,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312492.387, "dur": 0.160, + "args": { + "External id": 290826,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312494.243, "dur": 0.349, + "args": { + "External id": 290827,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367312496.349, "dur": 2.149, + "args": { + "External id": 290828,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367312516.309, "dur": 26.301, + "args": { + "External id": 290829,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367312608.293, "dur": 346.895, + "args": { + "External id": 290830,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367312681.540, "dur": 268.640, + "args": { + "External id": 290831,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6363, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367312697.054, "dur": 247.662, + "args": { + "External id": 290832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367312977.441, "dur": 2.061, + "args": { + "External id": 290833,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6365, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070547, "tid": 2070547, + "ts": 5333367313072.628, "dur": 17218.906, + "args": { + "External id": 290834,"Record function id": 0, "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313191.356, "dur": 7.431, + "args": { + "External id": 290835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313204.366, "dur": 1.338, + "args": { + "External id": 290836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313208.216, "dur": 3.194, + "args": { + "External id": 290837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313214.134, "dur": 1.081, + "args": { + "External id": 290838,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313217.644, "dur": 1.101, + "args": { + "External id": 290839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313220.933, "dur": 1.300, + "args": { + "External id": 290840,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313227.172, "dur": 0.991, + "args": { + "External id": 290841,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313231.345, "dur": 2.511, + "args": { + "External id": 290842,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313236.377, "dur": 0.890, + "args": { + "External id": 290843,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367313239.627, "dur": 1.069, + "args": { + "External id": 290844,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367313268.614, "dur": 16957.102, + "args": { + "External id": 290845,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367313288.992, "dur": 16924.516, + "args": { + "External id": 290846,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367313313.034, "dur": 17.704, + "args": { + "External id": 290847,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367313335.781, "dur": 16819.016, + "args": { + "External id": 290848,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367313338.948, "dur": 16815.092, + "args": { + "External id": 290849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367313346.816, "dur": 6.673, + "args": { + "External id": 290850,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367313355.607, "dur": 16794.450, + "args": { + "External id": 290851,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367330447.331, "dur": 39.057, + "args": { + "External id": 290852,"Sequence number": 1209197, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6384 + } + }, + { + "ph": "s", "id": 35, "pid": 2070547, "tid": 2070547, "ts": 5333367330447.331, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367330470.030, "dur": 11.357, + "args": { + "External id": 290853,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367330474.772, "dur": 6.304, + "args": { + "External id": 290854,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367330554.661, "dur": 124.203, + "args": { + "External id": 290855,"Record function id": 0, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367330681.949, "dur": 1166.198, + "args": { + "External id": 290856,"Record function id": 0, "Ev Idx": 6388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367330728.439, "dur": 1104.365, + "args": { + "External id": 290857,"Sequence number": 1209198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6389 + } + }, + { + "ph": "s", "id": 34, "pid": 2070547, "tid": 2070547, "ts": 5333367330728.439, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367330801.252, "dur": 49.442, + "args": { + "External id": 290858,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367330863.758, "dur": 105.874, + "args": { + "External id": 290859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367330979.630, "dur": 38.538, + "args": { + "External id": 290860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367331026.881, "dur": 30.992, + "args": { + "External id": 290861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367331084.456, "dur": 26.276, + "args": { + "External id": 290862,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367331130.659, "dur": 16.631, + "args": { + "External id": 290863,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367331183.424, "dur": 161.325, + "args": { + "External id": 290864,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367331245.162, "dur": 16.435, + "args": { + "External id": 290865,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367331252.669, "dur": 7.702, + "args": { + "External id": 290866,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367331265.230, "dur": 7.176, + "args": { + "External id": 290867,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367331274.723, "dur": 1.484, + "args": { + "External id": 290868,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367331279.569, "dur": 4.621, + "args": { + "External id": 290869,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367331358.167, "dur": 58.440, + "args": { + "External id": 290870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367331450.226, "dur": 29.806, + "args": { + "External id": 290871,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367331488.599, "dur": 42.660, + "args": { + "External id": 290872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367331539.979, "dur": 34.819, + "args": { + "External id": 290873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367331599.223, "dur": 64.393, + "args": { + "External id": 290874,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367331672.041, "dur": 39.026, + "args": { + "External id": 290875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367331734.020, "dur": 21.851, + "args": { + "External id": 290876,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2070547, "tid": 2070547, + "ts": 5333367331915.681, "dur": 75.467, + "args": { + "External id": 290877,"Record function id": 0, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367332065.074, "dur": 46.892, + "args": { + "External id": 290878,"Record function id": 0, "Ev Idx": 6410 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2070547, "tid": 2070547, + "ts": 5333367332121.514, "dur": 18292.122, + "args": { + "External id": 290879,"Record function id": 0, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070547, "tid": 2070547, + "ts": 5333367332130.530, "dur": 910.781, + "args": { + "External id": 290880,"Record function id": 0, "Ev Idx": 6412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367332246.126, "dur": 12.067, + "args": { + "External id": 290881,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367332276.797, "dur": 46.981, + "args": { + "External id": 290882,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332284.918, "dur": 2.654, + "args": { + "External id": 290883,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332293.302, "dur": 0.399, + "args": { + "External id": 290884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332296.213, "dur": 0.468, + "args": { + "External id": 290885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332299.633, "dur": 0.681, + "args": { + "External id": 290886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332304.099, "dur": 0.724, + "args": { + "External id": 290887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332306.258, "dur": 0.665, + "args": { + "External id": 290888,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332308.392, "dur": 3.887, + "args": { + "External id": 290889,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332314.072, "dur": 0.391, + "args": { + "External id": 290890,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332316.028, "dur": 0.487, + "args": { + "External id": 290891,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367332336.810, "dur": 48.075, + "args": { + "External id": 290892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367332421.189, "dur": 125.965, + "args": { + "External id": 290893,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367332432.952, "dur": 5.119, + "args": { + "External id": 290894,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367332443.193, "dur": 11.785, + "args": { + "External id": 290895,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367332448.160, "dur": 6.424, + "args": { + "External id": 290896,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332452.699, "dur": 0.586, + "args": { + "External id": 290897,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367332462.709, "dur": 34.634, + "args": { + "External id": 290898,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332465.153, "dur": 2.554, + "args": { + "External id": 290899,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332469.789, "dur": 0.575, + "args": { + "External id": 290900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332471.994, "dur": 0.209, + "args": { + "External id": 290901,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332476.565, "dur": 2.605, + "args": { + "External id": 290902,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332480.890, "dur": 0.675, + "args": { + "External id": 290903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332483.244, "dur": 0.556, + "args": { + "External id": 290904,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332487.564, "dur": 0.198, + "args": { + "External id": 290905,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332489.367, "dur": 0.336, + "args": { + "External id": 290906,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367332490.996, "dur": 2.124, + "args": { + "External id": 290907,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367332513.317, "dur": 25.776, + "args": { + "External id": 290908,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367332603.087, "dur": 343.509, + "args": { + "External id": 290909,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367332674.130, "dur": 267.230, + "args": { + "External id": 290910,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6442, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367332687.072, "dur": 249.282, + "args": { + "External id": 290911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367332969.869, "dur": 2.316, + "args": { + "External id": 290912,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6444, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070547, "tid": 2070547, + "ts": 5333367333062.489, "dur": 17096.848, + "args": { + "External id": 290913,"Record function id": 0, "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333160.360, "dur": 24.074, + "args": { + "External id": 290914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333192.503, "dur": 1.904, + "args": { + "External id": 290915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333197.241, "dur": 2.746, + "args": { + "External id": 290916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333202.739, "dur": 1.277, + "args": { + "External id": 290917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333206.474, "dur": 1.429, + "args": { + "External id": 290918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333210.058, "dur": 0.974, + "args": { + "External id": 290919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333215.836, "dur": 1.315, + "args": { + "External id": 290920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333219.749, "dur": 2.298, + "args": { + "External id": 290921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333224.767, "dur": 1.377, + "args": { + "External id": 290922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367333228.745, "dur": 1.199, + "args": { + "External id": 290923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367333256.659, "dur": 16844.744, + "args": { + "External id": 290924,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367333277.438, "dur": 16813.121, + "args": { + "External id": 290925,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367333306.944, "dur": 19.010, + "args": { + "External id": 290926,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367333330.861, "dur": 16717.991, + "args": { + "External id": 290927,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367333334.141, "dur": 16713.923, + "args": { + "External id": 290928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367333342.330, "dur": 7.776, + "args": { + "External id": 290929,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367333352.068, "dur": 16691.905, + "args": { + "External id": 290930,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367350344.274, "dur": 40.510, + "args": { + "External id": 290931,"Sequence number": 1209199, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6463 + } + }, + { + "ph": "s", "id": 33, "pid": 2070547, "tid": 2070547, "ts": 5333367350344.274, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367350368.278, "dur": 11.833, + "args": { + "External id": 290932,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367350373.273, "dur": 6.381, + "args": { + "External id": 290933,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367350457.935, "dur": 84.332, + "args": { + "External id": 290934,"Record function id": 0, "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367350544.005, "dur": 1201.994, + "args": { + "External id": 290935,"Record function id": 0, "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367350588.109, "dur": 1141.662, + "args": { + "External id": 290936,"Sequence number": 1209200, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6468 + } + }, + { + "ph": "s", "id": 32, "pid": 2070547, "tid": 2070547, "ts": 5333367350588.109, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367350698.971, "dur": 52.505, + "args": { + "External id": 290937,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367350765.794, "dur": 107.279, + "args": { + "External id": 290938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367350883.213, "dur": 42.128, + "args": { + "External id": 290939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367350933.986, "dur": 31.807, + "args": { + "External id": 290940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367350992.118, "dur": 28.339, + "args": { + "External id": 290941,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367351038.496, "dur": 15.634, + "args": { + "External id": 290942,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367351075.647, "dur": 165.345, + "args": { + "External id": 290943,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367351128.002, "dur": 11.694, + "args": { + "External id": 290944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367351133.450, "dur": 5.466, + "args": { + "External id": 290945,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367351143.125, "dur": 5.044, + "args": { + "External id": 290946,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367351149.833, "dur": 1.054, + "args": { + "External id": 290947,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367351153.756, "dur": 3.639, + "args": { + "External id": 290948,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367351257.363, "dur": 63.014, + "args": { + "External id": 290949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367351356.735, "dur": 33.736, + "args": { + "External id": 290950,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367351399.204, "dur": 41.966, + "args": { + "External id": 290951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367351449.793, "dur": 34.987, + "args": { + "External id": 290952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367351508.161, "dur": 25.680, + "args": { + "External id": 290953,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367351539.496, "dur": 34.572, + "args": { + "External id": 290954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367351595.063, "dur": 18.383, + "args": { + "External id": 290955,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2070547, "tid": 2070547, + "ts": 5333367351816.032, "dur": 77.419, + "args": { + "External id": 290956,"Record function id": 0, "Ev Idx": 6488 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367351969.783, "dur": 48.658, + "args": { + "External id": 290957,"Record function id": 0, "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2070547, "tid": 2070547, + "ts": 5333367352027.412, "dur": 18298.361, + "args": { + "External id": 290958,"Record function id": 0, "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070547, "tid": 2070547, + "ts": 5333367352034.857, "dur": 912.174, + "args": { + "External id": 290959,"Record function id": 0, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367352120.032, "dur": 9.546, + "args": { + "External id": 290960,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367352145.894, "dur": 63.679, + "args": { + "External id": 290961,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352151.679, "dur": 2.361, + "args": { + "External id": 290962,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352158.575, "dur": 0.227, + "args": { + "External id": 290963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352160.408, "dur": 0.538, + "args": { + "External id": 290964,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352162.444, "dur": 0.657, + "args": { + "External id": 290965,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352183.518, "dur": 1.152, + "args": { + "External id": 290966,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352188.441, "dur": 0.879, + "args": { + "External id": 290967,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352191.611, "dur": 4.201, + "args": { + "External id": 290968,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352198.331, "dur": 0.282, + "args": { + "External id": 290969,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352200.769, "dur": 0.837, + "args": { + "External id": 290970,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367352226.506, "dur": 53.410, + "args": { + "External id": 290971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367352319.569, "dur": 126.801, + "args": { + "External id": 290972,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367352331.299, "dur": 6.288, + "args": { + "External id": 290973,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367352342.790, "dur": 10.966, + "args": { + "External id": 290974,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367352347.339, "dur": 5.995, + "args": { + "External id": 290975,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352351.278, "dur": 0.738, + "args": { + "External id": 290976,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367352361.200, "dur": 36.991, + "args": { + "External id": 290977,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352368.255, "dur": 2.563, + "args": { + "External id": 290978,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352372.384, "dur": 0.444, + "args": { + "External id": 290979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352374.549, "dur": 0.499, + "args": { + "External id": 290980,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352378.382, "dur": 1.885, + "args": { + "External id": 290981,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352381.616, "dur": 0.370, + "args": { + "External id": 290982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352383.412, "dur": 0.578, + "args": { + "External id": 290983,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352386.699, "dur": 0.531, + "args": { + "External id": 290984,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352388.562, "dur": 0.343, + "args": { + "External id": 290985,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367352390.860, "dur": 2.254, + "args": { + "External id": 290986,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367352412.882, "dur": 25.066, + "args": { + "External id": 290987,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367352503.307, "dur": 345.136, + "args": { + "External id": 290988,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367352534.932, "dur": 308.406, + "args": { + "External id": 290989,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6521, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367352546.502, "dur": 288.401, + "args": { + "External id": 290990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367352872.746, "dur": 2.543, + "args": { + "External id": 290991,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6523, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070547, "tid": 2070547, + "ts": 5333367352969.069, "dur": 17110.759, + "args": { + "External id": 290992,"Record function id": 0, "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353068.257, "dur": 6.475, + "args": { + "External id": 290993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353078.078, "dur": 1.182, + "args": { + "External id": 290994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353081.334, "dur": 2.674, + "args": { + "External id": 290995,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353085.655, "dur": 1.218, + "args": { + "External id": 290996,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353088.447, "dur": 0.980, + "args": { + "External id": 290997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353090.913, "dur": 0.761, + "args": { + "External id": 290998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353095.543, "dur": 1.043, + "args": { + "External id": 290999,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353098.254, "dur": 1.958, + "args": { + "External id": 291000,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353102.077, "dur": 0.654, + "args": { + "External id": 291001,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367353104.479, "dur": 0.407, + "args": { + "External id": 291002,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367353125.167, "dur": 16899.555, + "args": { + "External id": 291003,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367353141.213, "dur": 16872.861, + "args": { + "External id": 291004,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367353161.469, "dur": 36.471, + "args": { + "External id": 291005,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367353203.864, "dur": 16770.723, + "args": { + "External id": 291006,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367353206.909, "dur": 16766.951, + "args": { + "External id": 291007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367353214.802, "dur": 8.666, + "args": { + "External id": 291008,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367353225.891, "dur": 16744.227, + "args": { + "External id": 291009,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367370258.058, "dur": 38.060, + "args": { + "External id": 291010,"Sequence number": 1209201, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6542 + } + }, + { + "ph": "s", "id": 31, "pid": 2070547, "tid": 2070547, "ts": 5333367370258.058, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367370279.597, "dur": 11.252, + "args": { + "External id": 291011,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367370284.482, "dur": 6.037, + "args": { + "External id": 291012,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367370367.393, "dur": 84.368, + "args": { + "External id": 291013,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367370453.248, "dur": 1129.962, + "args": { + "External id": 291014,"Record function id": 0, "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367370498.601, "dur": 1069.627, + "args": { + "External id": 291015,"Sequence number": 1209202, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6547 + } + }, + { + "ph": "s", "id": 30, "pid": 2070547, "tid": 2070547, "ts": 5333367370498.601, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367370570.550, "dur": 86.478, + "args": { + "External id": 291016,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367370675.003, "dur": 95.612, + "args": { + "External id": 291017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367370781.419, "dur": 37.444, + "args": { + "External id": 291018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367370827.387, "dur": 31.188, + "args": { + "External id": 291019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367370886.347, "dur": 28.129, + "args": { + "External id": 291020,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367370939.312, "dur": 17.062, + "args": { + "External id": 291021,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367370976.990, "dur": 133.148, + "args": { + "External id": 291022,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367371028.449, "dur": 12.270, + "args": { + "External id": 291023,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367371034.185, "dur": 5.661, + "args": { + "External id": 291024,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367371043.714, "dur": 5.148, + "args": { + "External id": 291025,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367371050.395, "dur": 1.252, + "args": { + "External id": 291026,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367371054.152, "dur": 3.923, + "args": { + "External id": 291027,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367371120.303, "dur": 44.272, + "args": { + "External id": 291028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367371222.093, "dur": 42.103, + "args": { + "External id": 291029,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367371275.436, "dur": 48.771, + "args": { + "External id": 291030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367371332.887, "dur": 34.226, + "args": { + "External id": 291031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367371390.533, "dur": 29.844, + "args": { + "External id": 291032,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367371425.866, "dur": 33.267, + "args": { + "External id": 291033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367371478.872, "dur": 17.960, + "args": { + "External id": 291034,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2070547, "tid": 2070547, + "ts": 5333367371692.602, "dur": 79.527, + "args": { + "External id": 291035,"Record function id": 0, "Ev Idx": 6567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367371847.566, "dur": 46.357, + "args": { + "External id": 291036,"Record function id": 0, "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2070547, "tid": 2070547, + "ts": 5333367371903.494, "dur": 18236.584, + "args": { + "External id": 291037,"Record function id": 0, "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070547, "tid": 2070547, + "ts": 5333367371912.625, "dur": 953.068, + "args": { + "External id": 291038,"Record function id": 0, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367371993.594, "dur": 9.819, + "args": { + "External id": 291039,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367372018.077, "dur": 39.452, + "args": { + "External id": 291040,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372023.794, "dur": 2.317, + "args": { + "External id": 291041,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372031.083, "dur": 0.519, + "args": { + "External id": 291042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372033.290, "dur": 0.668, + "args": { + "External id": 291043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372035.635, "dur": 0.466, + "args": { + "External id": 291044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372039.067, "dur": 0.520, + "args": { + "External id": 291045,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372041.333, "dur": 0.210, + "args": { + "External id": 291046,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372042.816, "dur": 3.909, + "args": { + "External id": 291047,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372048.459, "dur": 0.447, + "args": { + "External id": 291048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372050.318, "dur": 0.339, + "args": { + "External id": 291049,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367372069.524, "dur": 42.330, + "args": { + "External id": 291050,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367372144.569, "dur": 158.313, + "args": { + "External id": 291051,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367372155.799, "dur": 4.349, + "args": { + "External id": 291052,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367372164.819, "dur": 31.385, + "args": { + "External id": 291053,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367372187.320, "dur": 8.272, + "args": { + "External id": 291054,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372192.133, "dur": 1.144, + "args": { + "External id": 291055,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367372207.591, "dur": 40.159, + "args": { + "External id": 291056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372210.925, "dur": 2.900, + "args": { + "External id": 291057,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372216.022, "dur": 0.346, + "args": { + "External id": 291058,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372218.498, "dur": 0.529, + "args": { + "External id": 291059,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372223.301, "dur": 2.140, + "args": { + "External id": 291060,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372227.155, "dur": 0.538, + "args": { + "External id": 291061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372230.179, "dur": 0.547, + "args": { + "External id": 291062,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372235.818, "dur": 0.593, + "args": { + "External id": 291063,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372238.117, "dur": 0.817, + "args": { + "External id": 291064,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367372240.411, "dur": 2.385, + "args": { + "External id": 291065,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367372265.387, "dur": 28.933, + "args": { + "External id": 291066,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367372360.868, "dur": 398.615, + "args": { + "External id": 291067,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367372393.531, "dur": 360.276, + "args": { + "External id": 291068,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6600, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367372403.886, "dur": 342.948, + "args": { + "External id": 291069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367372785.764, "dur": 2.954, + "args": { + "External id": 291070,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6602, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070547, "tid": 2070547, + "ts": 5333367372888.184, "dur": 17035.599, + "args": { + "External id": 291071,"Record function id": 0, "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367372994.796, "dur": 6.919, + "args": { + "External id": 291072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373005.483, "dur": 1.074, + "args": { + "External id": 291073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373008.826, "dur": 2.497, + "args": { + "External id": 291074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373012.903, "dur": 0.823, + "args": { + "External id": 291075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373015.207, "dur": 1.071, + "args": { + "External id": 291076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373017.511, "dur": 0.982, + "args": { + "External id": 291077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373022.505, "dur": 0.902, + "args": { + "External id": 291078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373024.898, "dur": 2.462, + "args": { + "External id": 291079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373028.656, "dur": 1.026, + "args": { + "External id": 291080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367373031.089, "dur": 0.597, + "args": { + "External id": 291081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367373053.720, "dur": 16810.734, + "args": { + "External id": 291082,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367373069.864, "dur": 16782.896, + "args": { + "External id": 291083,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367373093.931, "dur": 15.781, + "args": { + "External id": 291084,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367373113.541, "dur": 16696.183, + "args": { + "External id": 291085,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367373116.026, "dur": 16692.784, + "args": { + "External id": 291086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367373121.658, "dur": 6.644, + "args": { + "External id": 291087,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367373130.047, "dur": 16674.013, + "args": { + "External id": 291088,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367390076.336, "dur": 37.195, + "args": { + "External id": 291089,"Sequence number": 1209203, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6621 + } + }, + { + "ph": "s", "id": 29, "pid": 2070547, "tid": 2070547, "ts": 5333367390076.336, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367390096.654, "dur": 11.430, + "args": { + "External id": 291090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367390101.565, "dur": 6.245, + "args": { + "External id": 291091,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367390203.605, "dur": 98.400, + "args": { + "External id": 291092,"Record function id": 0, "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367390304.658, "dur": 1170.920, + "args": { + "External id": 291093,"Record function id": 0, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367390357.362, "dur": 1102.772, + "args": { + "External id": 291094,"Sequence number": 1209204, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6626 + } + }, + { + "ph": "s", "id": 28, "pid": 2070547, "tid": 2070547, "ts": 5333367390357.362, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367390433.903, "dur": 51.677, + "args": { + "External id": 291095,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367390498.557, "dur": 109.578, + "args": { + "External id": 291096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367390659.504, "dur": 46.749, + "args": { + "External id": 291097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367390714.891, "dur": 30.802, + "args": { + "External id": 291098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367390775.777, "dur": 29.276, + "args": { + "External id": 291099,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367390821.098, "dur": 15.400, + "args": { + "External id": 291100,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367390857.228, "dur": 138.604, + "args": { + "External id": 291101,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367390912.192, "dur": 12.692, + "args": { + "External id": 291102,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367390918.480, "dur": 5.574, + "args": { + "External id": 291103,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367390927.703, "dur": 5.028, + "args": { + "External id": 291104,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367390934.564, "dur": 1.104, + "args": { + "External id": 291105,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367390940.706, "dur": 3.343, + "args": { + "External id": 291106,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367391006.838, "dur": 47.224, + "args": { + "External id": 291107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367391082.792, "dur": 27.772, + "args": { + "External id": 291108,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367391119.175, "dur": 41.500, + "args": { + "External id": 291109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367391183.129, "dur": 53.418, + "args": { + "External id": 291110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367391270.012, "dur": 31.441, + "args": { + "External id": 291111,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367391308.659, "dur": 35.906, + "args": { + "External id": 291112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367391362.479, "dur": 23.257, + "args": { + "External id": 291113,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2070547, "tid": 2070547, + "ts": 5333367391544.618, "dur": 113.598, + "args": { + "External id": 291114,"Record function id": 0, "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367391741.112, "dur": 50.245, + "args": { + "External id": 291115,"Record function id": 0, "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2070547, "tid": 2070547, + "ts": 5333367391800.893, "dur": 18079.824, + "args": { + "External id": 291116,"Record function id": 0, "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070547, "tid": 2070547, + "ts": 5333367391809.039, "dur": 853.974, + "args": { + "External id": 291117,"Record function id": 0, "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367391892.003, "dur": 10.090, + "args": { + "External id": 291118,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367391916.759, "dur": 39.012, + "args": { + "External id": 291119,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391922.625, "dur": 2.842, + "args": { + "External id": 291120,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391930.465, "dur": 0.205, + "args": { + "External id": 291121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391932.094, "dur": 0.446, + "args": { + "External id": 291122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391933.947, "dur": 0.436, + "args": { + "External id": 291123,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391938.071, "dur": 0.432, + "args": { + "External id": 291124,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391939.471, "dur": 0.381, + "args": { + "External id": 291125,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391941.576, "dur": 4.490, + "args": { + "External id": 291126,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391947.377, "dur": 0.190, + "args": { + "External id": 291127,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367391948.786, "dur": 0.150, + "args": { + "External id": 291128,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367391971.734, "dur": 44.372, + "args": { + "External id": 291129,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367392049.836, "dur": 110.300, + "args": { + "External id": 291130,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367392060.286, "dur": 6.036, + "args": { + "External id": 291131,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367392071.344, "dur": 10.893, + "args": { + "External id": 291132,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367392076.348, "dur": 5.493, + "args": { + "External id": 291133,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392079.847, "dur": 0.678, + "args": { + "External id": 291134,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367392089.213, "dur": 27.635, + "args": { + "External id": 291135,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392091.430, "dur": 0.616, + "args": { + "External id": 291136,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392093.447, "dur": 2.546, + "args": { + "External id": 291137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392097.372, "dur": 0.386, + "args": { + "External id": 291138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392098.885, "dur": 1.495, + "args": { + "External id": 291139,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392104.234, "dur": 0.211, + "args": { + "External id": 291140,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392105.461, "dur": 0.149, + "args": { + "External id": 291141,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392106.839, "dur": 0.334, + "args": { + "External id": 291142,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392110.097, "dur": 0.320, + "args": { + "External id": 291143,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392111.708, "dur": 0.536, + "args": { + "External id": 291144,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367392129.507, "dur": 22.071, + "args": { + "External id": 291145,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367392233.743, "dur": 300.186, + "args": { + "External id": 291146,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367392264.649, "dur": 264.983, + "args": { + "External id": 291147,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6679, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367392275.453, "dur": 248.875, + "args": { + "External id": 291148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367392554.975, "dur": 2.587, + "args": { + "External id": 291149,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6681, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070547, "tid": 2070547, + "ts": 5333367392686.755, "dur": 16979.531, + "args": { + "External id": 291150,"Record function id": 0, "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392789.021, "dur": 6.611, + "args": { + "External id": 291151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392799.155, "dur": 1.200, + "args": { + "External id": 291152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392802.374, "dur": 2.701, + "args": { + "External id": 291153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392806.944, "dur": 0.787, + "args": { + "External id": 291154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392809.356, "dur": 1.116, + "args": { + "External id": 291155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392812.056, "dur": 1.466, + "args": { + "External id": 291156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392817.164, "dur": 1.271, + "args": { + "External id": 291157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392820.439, "dur": 2.227, + "args": { + "External id": 291158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392824.608, "dur": 0.683, + "args": { + "External id": 291159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367392827.210, "dur": 0.583, + "args": { + "External id": 291160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367392848.508, "dur": 16728.975, + "args": { + "External id": 291161,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367392864.776, "dur": 16702.873, + "args": { + "External id": 291162,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367392886.368, "dur": 15.899, + "args": { + "External id": 291163,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367392906.121, "dur": 16622.995, + "args": { + "External id": 291164,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367392908.584, "dur": 16619.612, + "args": { + "External id": 291165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367392914.746, "dur": 6.528, + "args": { + "External id": 291166,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367392922.921, "dur": 16601.302, + "args": { + "External id": 291167,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367409818.223, "dur": 36.955, + "args": { + "External id": 291168,"Sequence number": 1209205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6700 + } + }, + { + "ph": "s", "id": 27, "pid": 2070547, "tid": 2070547, "ts": 5333367409818.223, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367409838.974, "dur": 10.732, + "args": { + "External id": 291169,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367409843.493, "dur": 5.926, + "args": { + "External id": 291170,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367409921.367, "dur": 81.993, + "args": { + "External id": 291171,"Record function id": 0, "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367410005.105, "dur": 1122.670, + "args": { + "External id": 291172,"Record function id": 0, "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367410049.001, "dur": 1064.411, + "args": { + "External id": 291173,"Sequence number": 1209206, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6705 + } + }, + { + "ph": "s", "id": 26, "pid": 2070547, "tid": 2070547, "ts": 5333367410049.001, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367410116.085, "dur": 47.092, + "args": { + "External id": 291174,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367410199.782, "dur": 106.328, + "args": { + "External id": 291175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367410317.814, "dur": 38.205, + "args": { + "External id": 291176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367410364.754, "dur": 30.490, + "args": { + "External id": 291177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367410423.439, "dur": 31.286, + "args": { + "External id": 291178,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367410472.004, "dur": 16.602, + "args": { + "External id": 291179,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367410509.001, "dur": 174.571, + "args": { + "External id": 291180,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367410562.047, "dur": 12.587, + "args": { + "External id": 291181,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367410568.058, "dur": 5.790, + "args": { + "External id": 291182,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367410577.263, "dur": 5.599, + "args": { + "External id": 291183,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367410584.220, "dur": 1.123, + "args": { + "External id": 291184,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367410587.780, "dur": 3.794, + "args": { + "External id": 291185,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367410697.344, "dur": 55.784, + "args": { + "External id": 291186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367410784.724, "dur": 31.012, + "args": { + "External id": 291187,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367410824.603, "dur": 41.750, + "args": { + "External id": 291188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367410875.104, "dur": 34.213, + "args": { + "External id": 291189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367410931.895, "dur": 27.676, + "args": { + "External id": 291190,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367410965.252, "dur": 33.388, + "args": { + "External id": 291191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367411018.539, "dur": 18.379, + "args": { + "External id": 291192,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2070547, "tid": 2070547, + "ts": 5333367411216.769, "dur": 82.959, + "args": { + "External id": 291193,"Record function id": 0, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367411378.708, "dur": 46.004, + "args": { + "External id": 291194,"Record function id": 0, "Ev Idx": 6726 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2070547, "tid": 2070547, + "ts": 5333367411433.918, "dur": 18459.253, + "args": { + "External id": 291195,"Record function id": 0, "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070547, "tid": 2070547, + "ts": 5333367411441.848, "dur": 887.952, + "args": { + "External id": 291196,"Record function id": 0, "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367411523.190, "dur": 10.462, + "args": { + "External id": 291197,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367411548.277, "dur": 37.281, + "args": { + "External id": 291198,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411554.073, "dur": 2.417, + "args": { + "External id": 291199,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411561.404, "dur": 0.706, + "args": { + "External id": 291200,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411563.308, "dur": 0.260, + "args": { + "External id": 291201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411564.979, "dur": 0.480, + "args": { + "External id": 291202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411568.484, "dur": 0.298, + "args": { + "External id": 291203,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411570.009, "dur": 0.575, + "args": { + "External id": 291204,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411571.512, "dur": 3.733, + "args": { + "External id": 291205,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411576.512, "dur": 0.606, + "args": { + "External id": 291206,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411578.677, "dur": 0.394, + "args": { + "External id": 291207,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367411596.615, "dur": 81.756, + "args": { + "External id": 291208,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367411716.321, "dur": 124.873, + "args": { + "External id": 291209,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367411727.701, "dur": 5.215, + "args": { + "External id": 291210,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367411737.964, "dur": 10.880, + "args": { + "External id": 291211,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367411742.607, "dur": 5.845, + "args": { + "External id": 291212,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411746.368, "dur": 0.526, + "args": { + "External id": 291213,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367411756.428, "dur": 31.974, + "args": { + "External id": 291214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411758.827, "dur": 2.940, + "args": { + "External id": 291215,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411763.379, "dur": 0.443, + "args": { + "External id": 291216,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411765.030, "dur": 0.468, + "args": { + "External id": 291217,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411769.120, "dur": 1.542, + "args": { + "External id": 291218,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411771.957, "dur": 0.520, + "args": { + "External id": 291219,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411773.655, "dur": 0.203, + "args": { + "External id": 291220,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411777.803, "dur": 0.376, + "args": { + "External id": 291221,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411779.149, "dur": 0.650, + "args": { + "External id": 291222,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367411781.489, "dur": 2.420, + "args": { + "External id": 291223,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367411805.834, "dur": 27.611, + "args": { + "External id": 291224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367411897.630, "dur": 327.639, + "args": { + "External id": 291225,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367411929.533, "dur": 289.981, + "args": { + "External id": 291226,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6758, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367411939.685, "dur": 272.052, + "args": { + "External id": 291227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367412251.193, "dur": 2.585, + "args": { + "External id": 291228,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6760, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070547, "tid": 2070547, + "ts": 5333367412350.525, "dur": 17323.927, + "args": { + "External id": 291229,"Record function id": 0, "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412453.542, "dur": 6.410, + "args": { + "External id": 291230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412463.224, "dur": 0.984, + "args": { + "External id": 291231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412465.798, "dur": 2.209, + "args": { + "External id": 291232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412469.649, "dur": 0.775, + "args": { + "External id": 291233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412471.729, "dur": 0.854, + "args": { + "External id": 291234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412473.871, "dur": 0.858, + "args": { + "External id": 291235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412492.542, "dur": 1.124, + "args": { + "External id": 291236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412495.800, "dur": 2.014, + "args": { + "External id": 291237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412499.297, "dur": 0.763, + "args": { + "External id": 291238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367412501.371, "dur": 0.381, + "args": { + "External id": 291239,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367412524.358, "dur": 17073.409, + "args": { + "External id": 291240,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367412540.176, "dur": 17047.184, + "args": { + "External id": 291241,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367412555.967, "dur": 16.281, + "args": { + "External id": 291242,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367412576.296, "dur": 16972.001, + "args": { + "External id": 291243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367412578.715, "dur": 16968.892, + "args": { + "External id": 291244,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367412585.337, "dur": 6.398, + "args": { + "External id": 291245,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367412593.634, "dur": 16950.322, + "args": { + "External id": 291246,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367429826.958, "dur": 39.489, + "args": { + "External id": 291247,"Sequence number": 1209207, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6779 + } + }, + { + "ph": "s", "id": 25, "pid": 2070547, "tid": 2070547, "ts": 5333367429826.958, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367429850.360, "dur": 10.538, + "args": { + "External id": 291248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367429854.824, "dur": 5.839, + "args": { + "External id": 291249,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367429932.884, "dur": 83.847, + "args": { + "External id": 291250,"Record function id": 0, "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367430018.267, "dur": 1143.766, + "args": { + "External id": 291251,"Record function id": 0, "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367430062.820, "dur": 1084.477, + "args": { + "External id": 291252,"Sequence number": 1209208, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6784 + } + }, + { + "ph": "s", "id": 24, "pid": 2070547, "tid": 2070547, "ts": 5333367430062.820, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367430133.468, "dur": 71.890, + "args": { + "External id": 291253,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367430224.928, "dur": 108.545, + "args": { + "External id": 291254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367430344.089, "dur": 41.615, + "args": { + "External id": 291255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367430394.188, "dur": 30.911, + "args": { + "External id": 291256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367430458.850, "dur": 32.819, + "args": { + "External id": 291257,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367430508.846, "dur": 16.441, + "args": { + "External id": 291258,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367430547.163, "dur": 181.359, + "args": { + "External id": 291259,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367430602.561, "dur": 12.232, + "args": { + "External id": 291260,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367430608.250, "dur": 5.741, + "args": { + "External id": 291261,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367430617.333, "dur": 41.977, + "args": { + "External id": 291262,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367430662.251, "dur": 1.277, + "args": { + "External id": 291263,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367430666.493, "dur": 5.526, + "args": { + "External id": 291264,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367430740.540, "dur": 53.767, + "args": { + "External id": 291265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367430825.130, "dur": 30.543, + "args": { + "External id": 291266,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367430865.040, "dur": 41.421, + "args": { + "External id": 291267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367430916.076, "dur": 35.845, + "args": { + "External id": 291268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367430977.327, "dur": 26.180, + "args": { + "External id": 291269,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367431009.243, "dur": 33.226, + "args": { + "External id": 291270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367431059.698, "dur": 21.604, + "args": { + "External id": 291271,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2070547, "tid": 2070547, + "ts": 5333367431260.559, "dur": 80.334, + "args": { + "External id": 291272,"Record function id": 0, "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367431418.497, "dur": 49.204, + "args": { + "External id": 291273,"Record function id": 0, "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2070547, "tid": 2070547, + "ts": 5333367431477.121, "dur": 18081.024, + "args": { + "External id": 291274,"Record function id": 0, "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070547, "tid": 2070547, + "ts": 5333367431485.993, "dur": 875.550, + "args": { + "External id": 291275,"Record function id": 0, "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367431568.135, "dur": 10.037, + "args": { + "External id": 291276,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367431591.537, "dur": 74.779, + "args": { + "External id": 291277,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431597.461, "dur": 2.446, + "args": { + "External id": 291278,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431604.965, "dur": 0.258, + "args": { + "External id": 291279,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431606.526, "dur": 0.643, + "args": { + "External id": 291280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431608.150, "dur": 0.472, + "args": { + "External id": 291281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431612.227, "dur": 0.382, + "args": { + "External id": 291282,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431613.613, "dur": 0.377, + "args": { + "External id": 291283,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431615.377, "dur": 39.294, + "args": { + "External id": 291284,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431657.780, "dur": 0.384, + "args": { + "External id": 291285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431659.556, "dur": 0.381, + "args": { + "External id": 291286,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367431678.909, "dur": 45.923, + "args": { + "External id": 291287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367431760.907, "dur": 117.768, + "args": { + "External id": 291288,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367431772.554, "dur": 5.528, + "args": { + "External id": 291289,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367431783.588, "dur": 10.530, + "args": { + "External id": 291290,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367431788.112, "dur": 5.612, + "args": { + "External id": 291291,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431791.784, "dur": 0.679, + "args": { + "External id": 291292,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367431801.287, "dur": 29.258, + "args": { + "External id": 291293,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431803.507, "dur": 2.693, + "args": { + "External id": 291294,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431807.619, "dur": 0.548, + "args": { + "External id": 291295,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431809.180, "dur": 0.343, + "args": { + "External id": 291296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431813.348, "dur": 1.363, + "args": { + "External id": 291297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431816.133, "dur": 0.159, + "args": { + "External id": 291298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431817.206, "dur": 0.155, + "args": { + "External id": 291299,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431820.758, "dur": 0.322, + "args": { + "External id": 291300,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431822.165, "dur": 0.320, + "args": { + "External id": 291301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367431823.903, "dur": 2.591, + "args": { + "External id": 291302,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367431844.510, "dur": 25.049, + "args": { + "External id": 291303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367431933.625, "dur": 327.672, + "args": { + "External id": 291304,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367431964.466, "dur": 291.744, + "args": { + "External id": 291305,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6837, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367431974.911, "dur": 274.868, + "args": { + "External id": 291306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367432287.125, "dur": 2.451, + "args": { + "External id": 291307,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6839, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070547, "tid": 2070547, + "ts": 5333367432383.198, "dur": 16962.041, + "args": { + "External id": 291308,"Record function id": 0, "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432485.008, "dur": 6.595, + "args": { + "External id": 291309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432495.139, "dur": 0.749, + "args": { + "External id": 291310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432497.594, "dur": 2.103, + "args": { + "External id": 291311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432501.108, "dur": 0.838, + "args": { + "External id": 291312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432503.348, "dur": 0.920, + "args": { + "External id": 291313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432505.654, "dur": 0.815, + "args": { + "External id": 291314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432510.259, "dur": 0.631, + "args": { + "External id": 291315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432512.518, "dur": 1.990, + "args": { + "External id": 291316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432515.761, "dur": 0.576, + "args": { + "External id": 291317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367432517.615, "dur": 0.588, + "args": { + "External id": 291318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367432538.077, "dur": 16753.593, + "args": { + "External id": 291319,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367432553.254, "dur": 16727.806, + "args": { + "External id": 291320,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367432576.272, "dur": 15.393, + "args": { + "External id": 291321,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367432595.697, "dur": 16638.841, + "args": { + "External id": 291322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367432598.202, "dur": 16635.408, + "args": { + "External id": 291323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367432604.387, "dur": 5.645, + "args": { + "External id": 291324,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367432611.945, "dur": 16617.366, + "args": { + "External id": 291325,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367449494.081, "dur": 38.203, + "args": { + "External id": 291326,"Sequence number": 1209209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6858 + } + }, + { + "ph": "s", "id": 23, "pid": 2070547, "tid": 2070547, "ts": 5333367449494.081, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367449515.681, "dur": 11.167, + "args": { + "External id": 291327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367449520.673, "dur": 5.890, + "args": { + "External id": 291328,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367449660.653, "dur": 91.866, + "args": { + "External id": 291329,"Record function id": 0, "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367449755.394, "dur": 1158.527, + "args": { + "External id": 291330,"Record function id": 0, "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367449801.809, "dur": 1097.286, + "args": { + "External id": 291331,"Sequence number": 1209210, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6863 + } + }, + { + "ph": "s", "id": 22, "pid": 2070547, "tid": 2070547, "ts": 5333367449801.809, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367449875.068, "dur": 50.816, + "args": { + "External id": 291332,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367449938.987, "dur": 104.703, + "args": { + "External id": 291333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367450056.128, "dur": 38.128, + "args": { + "External id": 291334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367450100.436, "dur": 30.433, + "args": { + "External id": 291335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367450157.626, "dur": 52.884, + "args": { + "External id": 291336,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367450235.821, "dur": 23.060, + "args": { + "External id": 291337,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367450280.693, "dur": 140.709, + "args": { + "External id": 291338,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367450336.827, "dur": 13.972, + "args": { + "External id": 291339,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367450342.558, "dur": 7.269, + "args": { + "External id": 291340,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367450353.439, "dur": 3.741, + "args": { + "External id": 291341,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367450358.667, "dur": 1.097, + "args": { + "External id": 291342,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367450364.141, "dur": 3.911, + "args": { + "External id": 291343,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367450432.457, "dur": 57.222, + "args": { + "External id": 291344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367450519.122, "dur": 30.458, + "args": { + "External id": 291345,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367450558.233, "dur": 40.797, + "args": { + "External id": 291346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367450605.065, "dur": 73.471, + "args": { + "External id": 291347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367450707.323, "dur": 32.982, + "args": { + "External id": 291348,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367450746.931, "dur": 36.752, + "args": { + "External id": 291349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367450802.574, "dur": 24.300, + "args": { + "External id": 291350,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2070547, "tid": 2070547, + "ts": 5333367450979.683, "dur": 73.612, + "args": { + "External id": 291351,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367451127.168, "dur": 64.920, + "args": { + "External id": 291352,"Record function id": 0, "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2070547, "tid": 2070547, + "ts": 5333367451206.331, "dur": 18189.869, + "args": { + "External id": 291353,"Record function id": 0, "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070547, "tid": 2070547, + "ts": 5333367451217.548, "dur": 842.287, + "args": { + "External id": 291354,"Record function id": 0, "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367451305.266, "dur": 11.824, + "args": { + "External id": 291355,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367451330.861, "dur": 34.896, + "args": { + "External id": 291356,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451335.951, "dur": 2.588, + "args": { + "External id": 291357,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451343.198, "dur": 0.243, + "args": { + "External id": 291358,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451344.626, "dur": 0.360, + "args": { + "External id": 291359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451346.046, "dur": 0.395, + "args": { + "External id": 291360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451349.402, "dur": 0.336, + "args": { + "External id": 291361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451350.847, "dur": 0.336, + "args": { + "External id": 291362,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451352.063, "dur": 3.044, + "args": { + "External id": 291363,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451356.055, "dur": 1.839, + "args": { + "External id": 291364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451359.004, "dur": 0.486, + "args": { + "External id": 291365,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367451376.983, "dur": 46.166, + "args": { + "External id": 291366,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367451457.484, "dur": 113.562, + "args": { + "External id": 291367,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367451468.547, "dur": 4.408, + "args": { + "External id": 291368,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367451478.040, "dur": 10.450, + "args": { + "External id": 291369,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367451482.799, "dur": 5.279, + "args": { + "External id": 291370,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451486.225, "dur": 0.576, + "args": { + "External id": 291371,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367451494.742, "dur": 25.810, + "args": { + "External id": 291372,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451496.444, "dur": 2.645, + "args": { + "External id": 291373,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451500.191, "dur": 0.213, + "args": { + "External id": 291374,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451501.130, "dur": 0.494, + "args": { + "External id": 291375,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451504.043, "dur": 0.360, + "args": { + "External id": 291376,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451505.038, "dur": 1.428, + "args": { + "External id": 291377,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451507.530, "dur": 0.574, + "args": { + "External id": 291378,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451510.863, "dur": 0.454, + "args": { + "External id": 291379,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451512.069, "dur": 0.354, + "args": { + "External id": 291380,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367451513.354, "dur": 2.595, + "args": { + "External id": 291381,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367451535.482, "dur": 27.729, + "args": { + "External id": 291382,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367451663.793, "dur": 299.647, + "args": { + "External id": 291383,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367451694.673, "dur": 264.488, + "args": { + "External id": 291384,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6916, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367451706.434, "dur": 247.234, + "args": { + "External id": 291385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367451985.003, "dur": 2.664, + "args": { + "External id": 291386,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6918, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070547, "tid": 2070547, + "ts": 5333367452079.947, "dur": 17069.151, + "args": { + "External id": 291387,"Record function id": 0, "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452204.338, "dur": 7.559, + "args": { + "External id": 291388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452216.585, "dur": 1.350, + "args": { + "External id": 291389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452220.448, "dur": 1.240, + "args": { + "External id": 291390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452224.078, "dur": 2.892, + "args": { + "External id": 291391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452229.119, "dur": 0.931, + "args": { + "External id": 291392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452231.523, "dur": 0.585, + "args": { + "External id": 291393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452236.179, "dur": 0.922, + "args": { + "External id": 291394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452238.870, "dur": 2.500, + "args": { + "External id": 291395,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452242.372, "dur": 0.849, + "args": { + "External id": 291396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367452244.485, "dur": 0.635, + "args": { + "External id": 291397,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367452266.207, "dur": 16828.432, + "args": { + "External id": 291398,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367452282.888, "dur": 16802.166, + "args": { + "External id": 291399,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367452302.934, "dur": 15.201, + "args": { + "External id": 291400,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367452322.028, "dur": 16723.025, + "args": { + "External id": 291401,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367452324.276, "dur": 16719.887, + "args": { + "External id": 291402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367452330.008, "dur": 7.514, + "args": { + "External id": 291403,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367452339.080, "dur": 16701.158, + "args": { + "External id": 291404,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367469330.111, "dur": 37.605, + "args": { + "External id": 291405,"Sequence number": 1209211, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6937 + } + }, + { + "ph": "s", "id": 21, "pid": 2070547, "tid": 2070547, "ts": 5333367469330.111, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367469350.883, "dur": 11.462, + "args": { + "External id": 291406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367469355.564, "dur": 6.374, + "args": { + "External id": 291407,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367469438.466, "dur": 84.420, + "args": { + "External id": 291408,"Record function id": 0, "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367469524.223, "dur": 1181.728, + "args": { + "External id": 291409,"Record function id": 0, "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367469566.832, "dur": 1122.873, + "args": { + "External id": 291410,"Sequence number": 1209212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6942 + } + }, + { + "ph": "s", "id": 20, "pid": 2070547, "tid": 2070547, "ts": 5333367469566.832, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367469671.238, "dur": 51.808, + "args": { + "External id": 291411,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367469739.025, "dur": 105.212, + "args": { + "External id": 291412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367469854.130, "dur": 38.067, + "args": { + "External id": 291413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367469901.245, "dur": 30.693, + "args": { + "External id": 291414,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367469958.905, "dur": 29.870, + "args": { + "External id": 291415,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367470006.554, "dur": 15.788, + "args": { + "External id": 291416,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367470046.582, "dur": 152.765, + "args": { + "External id": 291417,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367470102.545, "dur": 11.409, + "args": { + "External id": 291418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367470107.760, "dur": 5.465, + "args": { + "External id": 291419,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367470116.850, "dur": 3.760, + "args": { + "External id": 291420,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367470121.656, "dur": 0.929, + "args": { + "External id": 291421,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367470125.078, "dur": 2.596, + "args": { + "External id": 291422,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367470215.462, "dur": 74.040, + "args": { + "External id": 291423,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367470324.945, "dur": 32.155, + "args": { + "External id": 291424,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367470364.554, "dur": 41.524, + "args": { + "External id": 291425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367470414.360, "dur": 34.349, + "args": { + "External id": 291426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367470470.709, "dur": 28.887, + "args": { + "External id": 291427,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367470505.272, "dur": 35.068, + "args": { + "External id": 291428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367470562.329, "dur": 19.332, + "args": { + "External id": 291429,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2070547, "tid": 2070547, + "ts": 5333367470775.921, "dur": 76.383, + "args": { + "External id": 291430,"Record function id": 0, "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367470928.593, "dur": 47.311, + "args": { + "External id": 291431,"Record function id": 0, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.26)", "pid": 2070547, "tid": 2070547, + "ts": 5333367470984.865, "dur": 18196.831, + "args": { + "External id": 291432,"Record function id": 0, "Ev Idx": 6964 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070547, "tid": 2070547, + "ts": 5333367470993.471, "dur": 958.174, + "args": { + "External id": 291433,"Record function id": 0, "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367471079.969, "dur": 9.259, + "args": { + "External id": 291434,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367471103.033, "dur": 33.085, + "args": { + "External id": 291435,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471107.934, "dur": 2.301, + "args": { + "External id": 291436,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471114.734, "dur": 0.666, + "args": { + "External id": 291437,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471116.562, "dur": 0.445, + "args": { + "External id": 291438,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471117.850, "dur": 0.341, + "args": { + "External id": 291439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471121.046, "dur": 0.887, + "args": { + "External id": 291440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471122.926, "dur": 0.419, + "args": { + "External id": 291441,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471124.229, "dur": 3.064, + "args": { + "External id": 291442,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471128.318, "dur": 0.191, + "args": { + "External id": 291443,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471129.349, "dur": 0.165, + "args": { + "External id": 291444,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367471147.752, "dur": 68.253, + "args": { + "External id": 291445,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367471263.049, "dur": 124.814, + "args": { + "External id": 291446,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367471276.305, "dur": 6.125, + "args": { + "External id": 291447,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367471287.762, "dur": 10.953, + "args": { + "External id": 291448,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367471292.377, "dur": 5.911, + "args": { + "External id": 291449,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471295.865, "dur": 0.767, + "args": { + "External id": 291450,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367471310.759, "dur": 27.858, + "args": { + "External id": 291451,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471312.961, "dur": 2.558, + "args": { + "External id": 291452,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471316.882, "dur": 0.364, + "args": { + "External id": 291453,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471317.941, "dur": 0.337, + "args": { + "External id": 291454,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471321.069, "dur": 1.949, + "args": { + "External id": 291455,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471323.627, "dur": 0.310, + "args": { + "External id": 291456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471324.804, "dur": 0.383, + "args": { + "External id": 291457,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471328.259, "dur": 0.535, + "args": { + "External id": 291458,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471329.682, "dur": 0.281, + "args": { + "External id": 291459,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367471330.828, "dur": 2.390, + "args": { + "External id": 291460,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367471352.360, "dur": 27.168, + "args": { + "External id": 291461,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367471444.932, "dur": 400.372, + "args": { + "External id": 291462,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367471476.760, "dur": 362.627, + "args": { + "External id": 291463,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6995, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367471487.063, "dur": 345.948, + "args": { + "External id": 291464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367471872.046, "dur": 2.533, + "args": { + "External id": 291465,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6997, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070547, "tid": 2070547, + "ts": 5333367471973.933, "dur": 16983.487, + "args": { + "External id": 291466,"Record function id": 0, "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472074.788, "dur": 6.947, + "args": { + "External id": 291467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472084.832, "dur": 1.323, + "args": { + "External id": 291468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472088.173, "dur": 1.746, + "args": { + "External id": 291469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472091.903, "dur": 1.012, + "args": { + "External id": 291470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472094.455, "dur": 0.838, + "args": { + "External id": 291471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472096.537, "dur": 1.112, + "args": { + "External id": 291472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472101.275, "dur": 0.807, + "args": { + "External id": 291473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472103.637, "dur": 2.127, + "args": { + "External id": 291474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472107.167, "dur": 0.870, + "args": { + "External id": 291475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367472109.217, "dur": 0.789, + "args": { + "External id": 291476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367472130.859, "dur": 16771.303, + "args": { + "External id": 291477,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367472146.721, "dur": 16745.069, + "args": { + "External id": 291478,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367472165.211, "dur": 36.876, + "args": { + "External id": 291479,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367472208.310, "dur": 16643.996, + "args": { + "External id": 291480,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367472211.058, "dur": 16640.445, + "args": { + "External id": 291481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367472218.487, "dur": 7.161, + "args": { + "External id": 291482,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367472228.049, "dur": 16619.511, + "args": { + "External id": 291483,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367489102.871, "dur": 36.233, + "args": { + "External id": 291484,"Sequence number": 1209213, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7016 + } + }, + { + "ph": "s", "id": 19, "pid": 2070547, "tid": 2070547, "ts": 5333367489102.871, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367489123.461, "dur": 10.460, + "args": { + "External id": 291485,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367489128.262, "dur": 5.499, + "args": { + "External id": 291486,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367489232.979, "dur": 94.790, + "args": { + "External id": 291487,"Record function id": 0, "Ev Idx": 7019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367489329.513, "dur": 1139.905, + "args": { + "External id": 291488,"Record function id": 0, "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367489375.548, "dur": 1079.318, + "args": { + "External id": 291489,"Sequence number": 1209214, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7021 + } + }, + { + "ph": "s", "id": 18, "pid": 2070547, "tid": 2070547, "ts": 5333367489375.548, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367489447.973, "dur": 50.257, + "args": { + "External id": 291490,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367489510.829, "dur": 104.700, + "args": { + "External id": 291491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367489668.905, "dur": 46.008, + "args": { + "External id": 291492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367489725.454, "dur": 31.133, + "args": { + "External id": 291493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367489784.258, "dur": 29.418, + "args": { + "External id": 291494,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367489830.357, "dur": 15.985, + "args": { + "External id": 291495,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367489866.763, "dur": 130.001, + "args": { + "External id": 291496,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367489919.872, "dur": 11.436, + "args": { + "External id": 291497,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367489924.975, "dur": 5.523, + "args": { + "External id": 291498,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367489934.090, "dur": 4.020, + "args": { + "External id": 291499,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367489939.632, "dur": 1.242, + "args": { + "External id": 291500,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367489943.140, "dur": 2.481, + "args": { + "External id": 291501,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367490007.536, "dur": 44.408, + "args": { + "External id": 291502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367490082.391, "dur": 29.931, + "args": { + "External id": 291503,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367490118.728, "dur": 41.219, + "args": { + "External id": 291504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367490186.489, "dur": 53.486, + "args": { + "External id": 291505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367490268.574, "dur": 30.163, + "args": { + "External id": 291506,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367490304.664, "dur": 36.282, + "args": { + "External id": 291507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367490361.713, "dur": 19.018, + "args": { + "External id": 291508,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.26)", "pid": 2070547, "tid": 2070547, + "ts": 5333367490534.138, "dur": 72.826, + "args": { + "External id": 291509,"Record function id": 0, "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367490723.395, "dur": 47.520, + "args": { + "External id": 291510,"Record function id": 0, "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.27)", "pid": 2070547, "tid": 2070547, + "ts": 5333367490781.329, "dur": 18157.321, + "args": { + "External id": 291511,"Record function id": 0, "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070547, "tid": 2070547, + "ts": 5333367490789.874, "dur": 865.562, + "args": { + "External id": 291512,"Record function id": 0, "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367490872.718, "dur": 10.318, + "args": { + "External id": 291513,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367490896.182, "dur": 34.815, + "args": { + "External id": 291514,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490902.125, "dur": 2.213, + "args": { + "External id": 291515,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490908.219, "dur": 0.832, + "args": { + "External id": 291516,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490909.991, "dur": 0.755, + "args": { + "External id": 291517,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490911.697, "dur": 0.862, + "args": { + "External id": 291518,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490915.430, "dur": 0.598, + "args": { + "External id": 291519,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490916.876, "dur": 0.444, + "args": { + "External id": 291520,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490918.201, "dur": 2.648, + "args": { + "External id": 291521,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490921.787, "dur": 0.498, + "args": { + "External id": 291522,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367490923.255, "dur": 0.584, + "args": { + "External id": 291523,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367490942.694, "dur": 44.290, + "args": { + "External id": 291524,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367491020.081, "dur": 113.748, + "args": { + "External id": 291525,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367491030.586, "dur": 4.810, + "args": { + "External id": 291526,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367491040.468, "dur": 9.980, + "args": { + "External id": 291527,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367491045.025, "dur": 5.031, + "args": { + "External id": 291528,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491048.034, "dur": 0.795, + "args": { + "External id": 291529,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367491057.068, "dur": 28.644, + "args": { + "External id": 291530,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491059.230, "dur": 2.697, + "args": { + "External id": 291531,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491062.903, "dur": 0.374, + "args": { + "External id": 291532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491064.140, "dur": 0.498, + "args": { + "External id": 291533,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491067.667, "dur": 0.467, + "args": { + "External id": 291534,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491069.376, "dur": 0.438, + "args": { + "External id": 291535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491070.916, "dur": 0.487, + "args": { + "External id": 291536,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491074.783, "dur": 0.474, + "args": { + "External id": 291537,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491076.680, "dur": 0.449, + "args": { + "External id": 291538,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491078.498, "dur": 2.495, + "args": { + "External id": 291539,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367491101.240, "dur": 24.882, + "args": { + "External id": 291540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367491213.411, "dur": 310.419, + "args": { + "External id": 291541,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367491248.363, "dur": 270.378, + "args": { + "External id": 291542,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7074, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367491261.467, "dur": 252.017, + "args": { + "External id": 291543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367491545.833, "dur": 2.140, + "args": { + "External id": 291544,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7076, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070547, "tid": 2070547, + "ts": 5333367491681.538, "dur": 17041.521, + "args": { + "External id": 291545,"Record function id": 0, "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491788.116, "dur": 6.938, + "args": { + "External id": 291546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491798.566, "dur": 1.247, + "args": { + "External id": 291547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491801.663, "dur": 1.073, + "args": { + "External id": 291548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491804.772, "dur": 1.174, + "args": { + "External id": 291549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491807.468, "dur": 1.495, + "args": { + "External id": 291550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491810.594, "dur": 0.871, + "args": { + "External id": 291551,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491814.961, "dur": 1.363, + "args": { + "External id": 291552,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491817.723, "dur": 2.260, + "args": { + "External id": 291553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491821.637, "dur": 1.543, + "args": { + "External id": 291554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367491824.990, "dur": 1.055, + "args": { + "External id": 291555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367491845.876, "dur": 16821.433, + "args": { + "External id": 291556,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367491862.581, "dur": 16794.637, + "args": { + "External id": 291557,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367491881.255, "dur": 14.428, + "args": { + "External id": 291558,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367491899.340, "dur": 16683.147, + "args": { + "External id": 291559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367491901.549, "dur": 16679.995, + "args": { + "External id": 291560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367491908.037, "dur": 5.671, + "args": { + "External id": 291561,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367491915.504, "dur": 16661.968, + "args": { + "External id": 291562,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367508873.752, "dur": 38.727, + "args": { + "External id": 291563,"Sequence number": 1209215, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7095 + } + }, + { + "ph": "s", "id": 17, "pid": 2070547, "tid": 2070547, "ts": 5333367508873.752, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367508896.707, "dur": 10.922, + "args": { + "External id": 291564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367508901.432, "dur": 5.951, + "args": { + "External id": 291565,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367508979.211, "dur": 82.744, + "args": { + "External id": 291566,"Record function id": 0, "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367509063.422, "dur": 1180.636, + "args": { + "External id": 291567,"Record function id": 0, "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367509104.868, "dur": 1119.262, + "args": { + "External id": 291568,"Sequence number": 1209216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7100 + } + }, + { + "ph": "s", "id": 16, "pid": 2070547, "tid": 2070547, "ts": 5333367509104.868, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367509195.860, "dur": 60.085, + "args": { + "External id": 291569,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367509276.659, "dur": 108.662, + "args": { + "External id": 291570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367509395.574, "dur": 37.789, + "args": { + "External id": 291571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367509442.498, "dur": 30.519, + "args": { + "External id": 291572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367509507.577, "dur": 34.789, + "args": { + "External id": 291573,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367509557.900, "dur": 15.483, + "args": { + "External id": 291574,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367509592.151, "dur": 183.798, + "args": { + "External id": 291575,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367509686.881, "dur": 13.586, + "args": { + "External id": 291576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367509691.976, "dur": 7.426, + "args": { + "External id": 291577,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367509705.231, "dur": 4.658, + "args": { + "External id": 291578,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367509711.147, "dur": 2.011, + "args": { + "External id": 291579,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367509717.674, "dur": 3.965, + "args": { + "External id": 291580,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367509788.668, "dur": 53.796, + "args": { + "External id": 291581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367509879.625, "dur": 30.648, + "args": { + "External id": 291582,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367509918.951, "dur": 42.213, + "args": { + "External id": 291583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367509968.358, "dur": 33.484, + "args": { + "External id": 291584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367510023.972, "dur": 28.926, + "args": { + "External id": 291585,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367510059.128, "dur": 34.073, + "args": { + "External id": 291586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367510112.846, "dur": 19.391, + "args": { + "External id": 291587,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.27)", "pid": 2070547, "tid": 2070547, + "ts": 5333367510316.301, "dur": 77.510, + "args": { + "External id": 291588,"Record function id": 0, "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367510470.347, "dur": 46.721, + "args": { + "External id": 291589,"Record function id": 0, "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.28)", "pid": 2070547, "tid": 2070547, + "ts": 5333367510525.993, "dur": 18226.645, + "args": { + "External id": 291590,"Record function id": 0, "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070547, "tid": 2070547, + "ts": 5333367510533.307, "dur": 899.269, + "args": { + "External id": 291591,"Record function id": 0, "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367510616.520, "dur": 48.221, + "args": { + "External id": 291592,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367510681.186, "dur": 35.849, + "args": { + "External id": 291593,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510686.417, "dur": 2.514, + "args": { + "External id": 291594,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510693.129, "dur": 0.697, + "args": { + "External id": 291595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510695.266, "dur": 0.899, + "args": { + "External id": 291596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510697.566, "dur": 0.701, + "args": { + "External id": 291597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510700.539, "dur": 0.677, + "args": { + "External id": 291598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510702.885, "dur": 1.138, + "args": { + "External id": 291599,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510704.814, "dur": 1.580, + "args": { + "External id": 291600,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510708.128, "dur": 0.493, + "args": { + "External id": 291601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510709.994, "dur": 0.465, + "args": { + "External id": 291602,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367510729.363, "dur": 45.627, + "args": { + "External id": 291603,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367510809.257, "dur": 110.756, + "args": { + "External id": 291604,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367510820.798, "dur": 4.242, + "args": { + "External id": 291605,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367510830.178, "dur": 10.901, + "args": { + "External id": 291606,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367510834.955, "dur": 5.674, + "args": { + "External id": 291607,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510838.471, "dur": 0.725, + "args": { + "External id": 291608,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367510847.702, "dur": 27.514, + "args": { + "External id": 291609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510849.533, "dur": 2.275, + "args": { + "External id": 291610,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510853.485, "dur": 0.677, + "args": { + "External id": 291611,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510855.442, "dur": 0.676, + "args": { + "External id": 291612,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510858.906, "dur": 0.616, + "args": { + "External id": 291613,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510860.898, "dur": 0.951, + "args": { + "External id": 291614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510862.997, "dur": 0.392, + "args": { + "External id": 291615,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510865.427, "dur": 0.482, + "args": { + "External id": 291616,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510867.102, "dur": 0.430, + "args": { + "External id": 291617,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367510868.484, "dur": 1.998, + "args": { + "External id": 291618,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367510889.317, "dur": 22.351, + "args": { + "External id": 291619,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367510975.133, "dur": 353.359, + "args": { + "External id": 291620,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367511007.763, "dur": 314.859, + "args": { + "External id": 291621,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7153, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367511018.494, "dur": 295.254, + "args": { + "External id": 291622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367511355.285, "dur": 2.388, + "args": { + "External id": 291623,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7155, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070547, "tid": 2070547, + "ts": 5333367511454.688, "dur": 17039.156, + "args": { + "External id": 291624,"Record function id": 0, "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511556.159, "dur": 6.809, + "args": { + "External id": 291625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511566.384, "dur": 1.180, + "args": { + "External id": 291626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511569.209, "dur": 1.458, + "args": { + "External id": 291627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511572.505, "dur": 1.263, + "args": { + "External id": 291628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511575.389, "dur": 1.678, + "args": { + "External id": 291629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511578.524, "dur": 1.363, + "args": { + "External id": 291630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511583.877, "dur": 1.521, + "args": { + "External id": 291631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511587.790, "dur": 2.479, + "args": { + "External id": 291632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511591.671, "dur": 1.148, + "args": { + "External id": 291633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367511594.364, "dur": 1.065, + "args": { + "External id": 291634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367511616.060, "dur": 16817.526, + "args": { + "External id": 291635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367511667.343, "dur": 16756.811, + "args": { + "External id": 291636,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367511685.232, "dur": 15.056, + "args": { + "External id": 291637,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367511704.152, "dur": 16680.968, + "args": { + "External id": 291638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367511707.017, "dur": 16677.381, + "args": { + "External id": 291639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367511712.113, "dur": 6.069, + "args": { + "External id": 291640,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367511720.244, "dur": 16660.284, + "args": { + "External id": 291641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367528690.155, "dur": 33.977, + "args": { + "External id": 291642,"Sequence number": 1209217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7174 + } + }, + { + "ph": "s", "id": 15, "pid": 2070547, "tid": 2070547, "ts": 5333367528690.155, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367528708.198, "dur": 10.951, + "args": { + "External id": 291643,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367528712.514, "dur": 6.303, + "args": { + "External id": 291644,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367528794.483, "dur": 86.529, + "args": { + "External id": 291645,"Record function id": 0, "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367528882.516, "dur": 1154.846, + "args": { + "External id": 291646,"Record function id": 0, "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367528925.609, "dur": 1097.131, + "args": { + "External id": 291647,"Sequence number": 1209218, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7179 + } + }, + { + "ph": "s", "id": 14, "pid": 2070547, "tid": 2070547, "ts": 5333367528925.609, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367528995.539, "dur": 48.965, + "args": { + "External id": 291648,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367529058.686, "dur": 103.141, + "args": { + "External id": 291649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367529195.712, "dur": 55.822, + "args": { + "External id": 291650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367529264.926, "dur": 39.240, + "args": { + "External id": 291651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367529336.832, "dur": 30.141, + "args": { + "External id": 291652,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367529386.782, "dur": 16.925, + "args": { + "External id": 291653,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367529423.800, "dur": 135.299, + "args": { + "External id": 291654,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367529476.239, "dur": 13.070, + "args": { + "External id": 291655,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367529481.865, "dur": 6.507, + "args": { + "External id": 291656,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367529492.466, "dur": 4.827, + "args": { + "External id": 291657,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367529498.538, "dur": 1.460, + "args": { + "External id": 291658,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367529502.618, "dur": 4.524, + "args": { + "External id": 291659,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367529570.567, "dur": 46.957, + "args": { + "External id": 291660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367529693.735, "dur": 30.709, + "args": { + "External id": 291661,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367529733.509, "dur": 46.172, + "args": { + "External id": 291662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367529788.286, "dur": 34.426, + "args": { + "External id": 291663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367529845.437, "dur": 27.320, + "args": { + "External id": 291664,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367529879.526, "dur": 33.684, + "args": { + "External id": 291665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367529935.764, "dur": 20.110, + "args": { + "External id": 291666,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.28)", "pid": 2070547, "tid": 2070547, + "ts": 5333367530103.096, "dur": 99.758, + "args": { + "External id": 291667,"Record function id": 0, "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367530292.678, "dur": 47.573, + "args": { + "External id": 291668,"Record function id": 0, "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.29)", "pid": 2070547, "tid": 2070547, + "ts": 5333367530350.074, "dur": 18142.414, + "args": { + "External id": 291669,"Record function id": 0, "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070547, "tid": 2070547, + "ts": 5333367530357.349, "dur": 855.295, + "args": { + "External id": 291670,"Record function id": 0, "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367530439.503, "dur": 10.598, + "args": { + "External id": 291671,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367530463.565, "dur": 34.065, + "args": { + "External id": 291672,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530469.049, "dur": 2.476, + "args": { + "External id": 291673,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530476.030, "dur": 0.572, + "args": { + "External id": 291674,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530477.611, "dur": 0.781, + "args": { + "External id": 291675,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530479.853, "dur": 0.394, + "args": { + "External id": 291676,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530482.440, "dur": 0.631, + "args": { + "External id": 291677,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530484.186, "dur": 0.907, + "args": { + "External id": 291678,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530485.968, "dur": 1.728, + "args": { + "External id": 291679,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530489.080, "dur": 0.856, + "args": { + "External id": 291680,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530490.760, "dur": 0.688, + "args": { + "External id": 291681,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367530509.818, "dur": 47.398, + "args": { + "External id": 291682,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367530589.537, "dur": 157.809, + "args": { + "External id": 291683,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367530599.699, "dur": 4.622, + "args": { + "External id": 291684,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367530609.471, "dur": 46.483, + "args": { + "External id": 291685,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367530614.201, "dur": 40.899, + "args": { + "External id": 291686,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530617.331, "dur": 0.762, + "args": { + "External id": 291687,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367530665.717, "dur": 28.962, + "args": { + "External id": 291688,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530668.408, "dur": 2.355, + "args": { + "External id": 291689,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530671.995, "dur": 0.651, + "args": { + "External id": 291690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530673.775, "dur": 0.465, + "args": { + "External id": 291691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530676.950, "dur": 0.470, + "args": { + "External id": 291692,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530679.045, "dur": 0.615, + "args": { + "External id": 291693,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530680.705, "dur": 0.884, + "args": { + "External id": 291694,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530684.960, "dur": 0.576, + "args": { + "External id": 291695,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530686.268, "dur": 0.661, + "args": { + "External id": 291696,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367530688.193, "dur": 1.843, + "args": { + "External id": 291697,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367530710.666, "dur": 27.769, + "args": { + "External id": 291698,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367530803.055, "dur": 296.411, + "args": { + "External id": 291699,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367530835.306, "dur": 259.538, + "args": { + "External id": 291700,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7232, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367530845.966, "dur": 243.399, + "args": { + "External id": 291701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367531120.013, "dur": 2.327, + "args": { + "External id": 291702,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7234, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070547, "tid": 2070547, + "ts": 5333367531241.896, "dur": 17035.721, + "args": { + "External id": 291703,"Record function id": 0, "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531350.279, "dur": 6.836, + "args": { + "External id": 291704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531360.515, "dur": 1.005, + "args": { + "External id": 291705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531363.313, "dur": 0.875, + "args": { + "External id": 291706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531366.336, "dur": 1.352, + "args": { + "External id": 291707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531369.193, "dur": 0.850, + "args": { + "External id": 291708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531371.413, "dur": 1.354, + "args": { + "External id": 291709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531376.050, "dur": 1.584, + "args": { + "External id": 291710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531379.189, "dur": 2.985, + "args": { + "External id": 291711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531383.567, "dur": 0.877, + "args": { + "External id": 291712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367531386.061, "dur": 0.744, + "args": { + "External id": 291713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367531417.958, "dur": 16805.383, + "args": { + "External id": 291714,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367531434.138, "dur": 16778.288, + "args": { + "External id": 291715,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367531450.593, "dur": 15.293, + "args": { + "External id": 291716,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367531469.767, "dur": 16684.013, + "args": { + "External id": 291717,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367531472.014, "dur": 16680.972, + "args": { + "External id": 291718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367531477.478, "dur": 6.278, + "args": { + "External id": 291719,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367531485.473, "dur": 16663.524, + "args": { + "External id": 291720,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367548429.551, "dur": 37.085, + "args": { + "External id": 291721,"Sequence number": 1209219, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7253 + } + }, + { + "ph": "s", "id": 13, "pid": 2070547, "tid": 2070547, "ts": 5333367548429.551, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367548450.935, "dur": 10.790, + "args": { + "External id": 291722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367548455.448, "dur": 6.011, + "args": { + "External id": 291723,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367548533.671, "dur": 121.878, + "args": { + "External id": 291724,"Record function id": 0, "Ev Idx": 7256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367548659.169, "dur": 1169.062, + "args": { + "External id": 291725,"Record function id": 0, "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367548706.867, "dur": 1106.416, + "args": { + "External id": 291726,"Sequence number": 1209220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7258 + } + }, + { + "ph": "s", "id": 12, "pid": 2070547, "tid": 2070547, "ts": 5333367548706.867, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367548781.940, "dur": 49.127, + "args": { + "External id": 291727,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367548845.927, "dur": 107.584, + "args": { + "External id": 291728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367548963.089, "dur": 40.143, + "args": { + "External id": 291729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367549009.723, "dur": 30.742, + "args": { + "External id": 291730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367549069.438, "dur": 27.911, + "args": { + "External id": 291731,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367549113.767, "dur": 17.306, + "args": { + "External id": 291732,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367549147.595, "dur": 178.557, + "args": { + "External id": 291733,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367549226.784, "dur": 17.227, + "args": { + "External id": 291734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367549232.981, "dur": 9.656, + "args": { + "External id": 291735,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367549247.809, "dur": 6.115, + "args": { + "External id": 291736,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367549255.701, "dur": 3.303, + "args": { + "External id": 291737,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367549262.481, "dur": 4.756, + "args": { + "External id": 291738,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367549339.342, "dur": 57.883, + "args": { + "External id": 291739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367549437.239, "dur": 33.786, + "args": { + "External id": 291740,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367549481.632, "dur": 41.299, + "args": { + "External id": 291741,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367549530.593, "dur": 34.986, + "args": { + "External id": 291742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367549588.823, "dur": 25.206, + "args": { + "External id": 291743,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367549658.236, "dur": 42.225, + "args": { + "External id": 291744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367549722.451, "dur": 23.215, + "args": { + "External id": 291745,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.29)", "pid": 2070547, "tid": 2070547, + "ts": 5333367549896.147, "dur": 74.837, + "args": { + "External id": 291746,"Record function id": 0, "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367550043.968, "dur": 45.606, + "args": { + "External id": 291747,"Record function id": 0, "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.30)", "pid": 2070547, "tid": 2070547, + "ts": 5333367550098.797, "dur": 18667.665, + "args": { + "External id": 291748,"Record function id": 0, "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070547, "tid": 2070547, + "ts": 5333367550106.932, "dur": 884.241, + "args": { + "External id": 291749,"Record function id": 0, "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367550212.896, "dur": 12.315, + "args": { + "External id": 291750,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367550242.199, "dur": 37.659, + "args": { + "External id": 291751,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550248.415, "dur": 2.758, + "args": { + "External id": 291752,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550255.593, "dur": 0.818, + "args": { + "External id": 291753,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550258.411, "dur": 0.614, + "args": { + "External id": 291754,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550260.401, "dur": 0.962, + "args": { + "External id": 291755,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550264.164, "dur": 0.587, + "args": { + "External id": 291756,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550266.136, "dur": 0.563, + "args": { + "External id": 291757,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550268.288, "dur": 1.438, + "args": { + "External id": 291758,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550270.891, "dur": 0.559, + "args": { + "External id": 291759,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550272.535, "dur": 0.520, + "args": { + "External id": 291760,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367550291.551, "dur": 46.236, + "args": { + "External id": 291761,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367550372.002, "dur": 114.425, + "args": { + "External id": 291762,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367550382.863, "dur": 4.520, + "args": { + "External id": 291763,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367550392.406, "dur": 10.189, + "args": { + "External id": 291764,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367550396.948, "dur": 5.218, + "args": { + "External id": 291765,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550400.253, "dur": 0.781, + "args": { + "External id": 291766,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367550409.923, "dur": 30.469, + "args": { + "External id": 291767,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550412.389, "dur": 2.773, + "args": { + "External id": 291768,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550416.667, "dur": 0.467, + "args": { + "External id": 291769,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550418.906, "dur": 0.803, + "args": { + "External id": 291770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550421.685, "dur": 0.568, + "args": { + "External id": 291771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550423.915, "dur": 1.271, + "args": { + "External id": 291772,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550426.517, "dur": 0.657, + "args": { + "External id": 291773,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550429.820, "dur": 0.916, + "args": { + "External id": 291774,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550432.140, "dur": 0.384, + "args": { + "External id": 291775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367550434.195, "dur": 1.970, + "args": { + "External id": 291776,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367550454.282, "dur": 23.371, + "args": { + "External id": 291777,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367550542.506, "dur": 348.680, + "args": { + "External id": 291778,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367550572.782, "dur": 313.048, + "args": { + "External id": 291779,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7311, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367550583.130, "dur": 296.771, + "args": { + "External id": 291780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367550917.889, "dur": 2.507, + "args": { + "External id": 291781,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7313, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070547, "tid": 2070547, + "ts": 5333367551012.666, "dur": 17334.862, + "args": { + "External id": 291782,"Record function id": 0, "Ev Idx": 7314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551110.409, "dur": 6.467, + "args": { + "External id": 291783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551120.558, "dur": 1.424, + "args": { + "External id": 291784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551123.988, "dur": 1.262, + "args": { + "External id": 291785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551126.876, "dur": 0.938, + "args": { + "External id": 291786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551129.519, "dur": 1.319, + "args": { + "External id": 291787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551132.467, "dur": 0.995, + "args": { + "External id": 291788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551137.813, "dur": 0.962, + "args": { + "External id": 291789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551140.017, "dur": 2.116, + "args": { + "External id": 291790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551143.849, "dur": 0.965, + "args": { + "External id": 291791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367551146.056, "dur": 0.769, + "args": { + "External id": 291792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367551182.596, "dur": 17039.848, + "args": { + "External id": 291793,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367551203.735, "dur": 16994.499, + "args": { + "External id": 291794,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367551224.090, "dur": 16.470, + "args": { + "External id": 291795,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367551245.466, "dur": 16861.756, + "args": { + "External id": 291796,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367551248.367, "dur": 16856.898, + "args": { + "External id": 291797,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367551254.664, "dur": 8.508, + "args": { + "External id": 291798,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367551265.533, "dur": 16828.171, + "args": { + "External id": 291799,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367568660.221, "dur": 68.584, + "args": { + "External id": 291800,"Sequence number": 1209221, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7332 + } + }, + { + "ph": "s", "id": 11, "pid": 2070547, "tid": 2070547, "ts": 5333367568660.221, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367568702.456, "dur": 20.438, + "args": { + "External id": 291801,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367568712.021, "dur": 10.349, + "args": { + "External id": 291802,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367568841.183, "dur": 191.423, + "args": { + "External id": 291803,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367569034.778, "dur": 1378.246, + "args": { + "External id": 291804,"Record function id": 0, "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367569102.914, "dur": 1293.059, + "args": { + "External id": 291805,"Sequence number": 1209222, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7337 + } + }, + { + "ph": "s", "id": 10, "pid": 2070547, "tid": 2070547, "ts": 5333367569102.914, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367569223.016, "dur": 70.644, + "args": { + "External id": 291806,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367569311.749, "dur": 141.050, + "args": { + "External id": 291807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367569462.575, "dur": 37.766, + "args": { + "External id": 291808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367569511.084, "dur": 31.865, + "args": { + "External id": 291809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367569577.198, "dur": 32.124, + "args": { + "External id": 291810,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367569662.494, "dur": 19.470, + "args": { + "External id": 291811,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367569709.913, "dur": 181.370, + "args": { + "External id": 291812,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367569794.523, "dur": 16.077, + "args": { + "External id": 291813,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367569801.166, "dur": 8.388, + "args": { + "External id": 291814,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367569814.555, "dur": 5.702, + "args": { + "External id": 291815,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367569821.476, "dur": 1.249, + "args": { + "External id": 291816,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367569825.470, "dur": 5.453, + "args": { + "External id": 291817,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367569902.608, "dur": 52.484, + "args": { + "External id": 291818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367569991.562, "dur": 37.956, + "args": { + "External id": 291819,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367570038.090, "dur": 41.821, + "args": { + "External id": 291820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367570090.196, "dur": 34.371, + "args": { + "External id": 291821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367570154.715, "dur": 53.366, + "args": { + "External id": 291822,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367570216.517, "dur": 39.948, + "args": { + "External id": 291823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367570282.688, "dur": 21.342, + "args": { + "External id": 291824,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.30)", "pid": 2070547, "tid": 2070547, + "ts": 5333367570505.892, "dur": 102.156, + "args": { + "External id": 291825,"Record function id": 0, "Ev Idx": 7357 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070547, "tid": 2070547, + "ts": 5333367570744.720, "dur": 53.919, + "args": { + "External id": 291826,"Record function id": 0, "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.31)", "pid": 2070547, "tid": 2070547, + "ts": 5333367570809.860, "dur": 16741.310, + "args": { + "External id": 291827,"Record function id": 0, "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.31)", "pid": 2070547, "tid": 2070547, + "ts": 5333367570825.576, "dur": 1183.574, + "args": { + "External id": 291828,"Record function id": 0, "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367570933.326, "dur": 11.336, + "args": { + "External id": 291829,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367570964.669, "dur": 49.076, + "args": { + "External id": 291830,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367570972.845, "dur": 2.823, + "args": { + "External id": 291831,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367570981.255, "dur": 0.544, + "args": { + "External id": 291832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367570983.054, "dur": 0.647, + "args": { + "External id": 291833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367570985.017, "dur": 1.148, + "args": { + "External id": 291834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367570990.226, "dur": 1.152, + "args": { + "External id": 291835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367570993.321, "dur": 0.942, + "args": { + "External id": 291836,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367570995.656, "dur": 2.815, + "args": { + "External id": 291837,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571000.100, "dur": 0.897, + "args": { + "External id": 291838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571002.261, "dur": 0.912, + "args": { + "External id": 291839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367571028.817, "dur": 51.989, + "args": { + "External id": 291840,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070547, "tid": 2070547, + "ts": 5333367571155.915, "dur": 165.637, + "args": { + "External id": 291841,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "0", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367571186.780, "dur": 9.488, + "args": { + "External id": 291842,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070547, "tid": 2070547, + "ts": 5333367571204.370, "dur": 14.249, + "args": { + "External id": 291843,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367571210.799, "dur": 7.276, + "args": { + "External id": 291844,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "6423040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571215.094, "dur": 0.854, + "args": { + "External id": 291845,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070547, "tid": 2070547, + "ts": 5333367571226.409, "dur": 35.385, + "args": { + "External id": 291846,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571228.845, "dur": 0.651, + "args": { + "External id": 291847,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571231.294, "dur": 4.015, + "args": { + "External id": 291848,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571236.937, "dur": 0.518, + "args": { + "External id": 291849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571238.611, "dur": 0.707, + "args": { + "External id": 291850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571243.916, "dur": 0.986, + "args": { + "External id": 291851,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571246.252, "dur": 0.567, + "args": { + "External id": 291852,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571247.583, "dur": 0.496, + "args": { + "External id": 291853,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571252.434, "dur": 0.627, + "args": { + "External id": 291854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367571253.884, "dur": 0.642, + "args": { + "External id": 291855,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367571280.650, "dur": 31.700, + "args": { + "External id": 291856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070547, "tid": 2070547, + "ts": 5333367571391.154, "dur": 494.141, + "args": { + "External id": 291857,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367571431.445, "dur": 447.259, + "args": { + "External id": 291858,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 0, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7390, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070547, "tid": 2070547, + "ts": 5333367571445.722, "dur": 424.961, + "args": { + "External id": 291859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333367571919.839, "dur": 2.826, + "args": { + "External id": 291860,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7392, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.31)", "pid": 2070547, "tid": 2070547, + "ts": 5333367572037.364, "dur": 15281.728, + "args": { + "External id": 291861,"Record function id": 0, "Ev Idx": 7393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572155.658, "dur": 7.004, + "args": { + "External id": 291862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572181.198, "dur": 2.380, + "args": { + "External id": 291863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572187.032, "dur": 1.064, + "args": { + "External id": 291864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572189.935, "dur": 1.445, + "args": { + "External id": 291865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572192.760, "dur": 1.621, + "args": { + "External id": 291866,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572195.694, "dur": 1.194, + "args": { + "External id": 291867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572200.657, "dur": 1.362, + "args": { + "External id": 291868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572203.780, "dur": 4.157, + "args": { + "External id": 291869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572209.220, "dur": 1.437, + "args": { + "External id": 291870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367572212.295, "dur": 1.217, + "args": { + "External id": 291871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367572241.238, "dur": 15030.641, + "args": { + "External id": 291872,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367572259.799, "dur": 15003.690, + "args": { + "External id": 291873,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367572280.360, "dur": 19.081, + "args": { + "External id": 291874,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367572304.530, "dur": 14923.895, + "args": { + "External id": 291875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367572307.984, "dur": 14919.634, + "args": { + "External id": 291876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367572314.948, "dur": 7.234, + "args": { + "External id": 291877,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367572324.719, "dur": 14899.566, + "args": { + "External id": 291878,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367587482.395, "dur": 42.107, + "args": { + "External id": 291879,"Sequence number": 1209223, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7411 + } + }, + { + "ph": "s", "id": 9, "pid": 2070547, "tid": 2070547, "ts": 5333367587482.395, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333367587510.601, "dur": 9.337, + "args": { + "External id": 291880,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367587514.457, "dur": 5.168, + "args": { + "External id": 291881,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367587595.090, "dur": 128.189, + "args": { + "External id": 291882,"Record function id": 0, "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070547, "tid": 2070547, + "ts": 5333367587726.517, "dur": 1157.669, + "args": { + "External id": 291883,"Record function id": 0, "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367587777.775, "dur": 1092.015, + "args": { + "External id": 291884,"Sequence number": 1209224, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7416 + } + }, + { + "ph": "s", "id": 8, "pid": 2070547, "tid": 2070547, "ts": 5333367587777.775, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367587855.268, "dur": 49.379, + "args": { + "External id": 291885,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367587918.600, "dur": 108.071, + "args": { + "External id": 291886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367588040.171, "dur": 39.922, + "args": { + "External id": 291887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367588087.484, "dur": 31.238, + "args": { + "External id": 291888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367588154.760, "dur": 48.042, + "args": { + "External id": 291889,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367588221.744, "dur": 15.759, + "args": { + "External id": 291890,"kernel_hash": "cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ex/cex6azrfnirqh3jognpirazamj4oyxhnk3mxs7t2vmitx2fbo554.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367588257.961, "dur": 140.671, + "args": { + "External id": 291891,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367588314.770, "dur": 13.710, + "args": { + "External id": 291892,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367588320.166, "dur": 7.497, + "args": { + "External id": 291893,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367588331.603, "dur": 5.317, + "args": { + "External id": 291894,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367588338.199, "dur": 1.491, + "args": { + "External id": 291895,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367588344.173, "dur": 4.105, + "args": { + "External id": 291896,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367588409.472, "dur": 50.103, + "args": { + "External id": 291897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070547, "tid": 2070547, + "ts": 5333367588493.800, "dur": 31.647, + "args": { + "External id": 291898,"kernel_hash": "cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/w7/cw7dgbt4tjenpklhtvob3eghj7bbox7g2wxbo732slxqx2vrsyd5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367588533.459, "dur": 41.413, + "args": { + "External id": 291899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367588582.007, "dur": 34.281, + "args": { + "External id": 291900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367588678.592, "dur": 33.291, + "args": { + "External id": 291901,"kernel_hash": "czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/zh/czh4pfojeoszm3aid3si2yxrotnl4lfradxhc2mhjrcx6kyggzha.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367588719.261, "dur": 38.674, + "args": { + "External id": 291902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070547, "tid": 2070547, + "ts": 5333367588778.892, "dur": 22.495, + "args": { + "External id": 291903,"kernel_hash": "cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/nz/cnzai7gm7fkymyw2bih55vxu3i63al2yxbe4zf3zad5ca4tx2gz4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.31)", "pid": 2070547, "tid": 2070547, + "ts": 5333367588951.962, "dur": 34.212, + "args": { + "External id": 291904,"Record function id": 0, "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070547, "tid": 2070547, + "ts": 5333367589065.777, "dur": 50.092, + "args": { + "External id": 291905,"Record function id": 0, "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2070547, "tid": 2070547, + "ts": 5333367589117.148, "dur": 216.097, + "args": { + "External id": 291906,"Record function id": 0, "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367589152.632, "dur": 172.497, + "args": { + "External id": 291907,"Sequence number": 1209225, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [8388608, 2048, 1]], "Input Dims": [[2048], [16, 4096, 2048]], "Ev Idx": 7439 + } + }, + { + "ph": "s", "id": 7, "pid": 2070547, "tid": 2070547, "ts": 5333367589152.632, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070547, "tid": 2070547, + "ts": 5333367589245.120, "dur": 39.522, + "args": { + "External id": 291908,"kernel_hash": "cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ml/cmljljlwjilefo6ueuqn5gaz5uvsdhhmauseg2vxhp3egffs57dc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367589408.656, "dur": 0.728, + "args": { + "External id": 291909,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367589417.663, "dur": 13.364, + "args": { + "External id": 291910,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589424.837, "dur": 2.923, + "args": { + "External id": 291911,"Record function id": 0, "Concrete Inputs": ["", "[16, 8191]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367589436.112, "dur": 3.538, + "args": { + "External id": 291912,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589437.719, "dur": 1.216, + "args": { + "External id": 291913,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367589440.933, "dur": 4.054, + "args": { + "External id": 291914,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589442.685, "dur": 0.998, + "args": { + "External id": 291915,"Record function id": 0, "Concrete Inputs": ["", "[16, 1]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::full_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367589453.690, "dur": 47.564, + "args": { + "External id": 291916,"Record function id": 0, "Concrete Inputs": ["", "-100", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367589456.009, "dur": 13.186, + "args": { + "External id": 291917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["long int", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589462.788, "dur": 5.890, + "args": { + "External id": 291918,"Record function id": 0, "Concrete Inputs": ["[16, 1]", "[1, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589472.002, "dur": 28.794, + "args": { + "External id": 291919,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1, 1], []], "Input Dims": [[16, 1], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070547, "tid": 2070547, + "ts": 5333367589510.362, "dur": 35.104, + "args": { + "External id": 291920,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8192, 1], [1, 1]], []], "Input Dims": [[[16, 8191], [16, 1]], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367589553.602, "dur": 3.861, + "args": { + "External id": 291921,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589555.654, "dur": 1.077, + "args": { + "External id": 291922,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070547, "tid": 2070547, + "ts": 5333367589564.131, "dur": 39.226, + "args": { + "External id": 291923,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070547, "tid": 2070547, + "ts": 5333367589566.562, "dur": 36.559, + "args": { + "External id": 291924,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367589568.662, "dur": 8.011, + "args": { + "External id": 291925,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367589573.153, "dur": 3.132, + "args": { + "External id": 291926,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589578.261, "dur": 24.386, + "args": { + "External id": 291927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 7459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367589670.654, "dur": 8.826, + "args": { + "External id": 291928,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7460 + } + }, + { + "ph": "s", "id": 6, "pid": 2070547, "tid": 2070547, "ts": 5333367589670.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367589682.668, "dur": 1.451, + "args": { + "External id": 291929,"Sequence number": 1209227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367589715.883, "dur": 18746.188, + "args": { + "External id": 291930,"Sequence number": 1209227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 7462 + } + }, + { + "ph": "s", "id": 5, "pid": 2070547, "tid": 2070547, "ts": 5333367589715.883, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367589751.673, "dur": 35.857, + "args": { + "External id": 291931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367589753.484, "dur": 11.509, + "args": { + "External id": 291932,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589755.520, "dur": 9.013, + "args": { + "External id": 291933,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589767.321, "dur": 19.990, + "args": { + "External id": 291934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589768.875, "dur": 18.055, + "args": { + "External id": 291935,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367589791.387, "dur": 24.154, + "args": { + "External id": 291936,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367589792.456, "dur": 5.047, + "args": { + "External id": 291937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589794.222, "dur": 2.901, + "args": { + "External id": 291938,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589798.140, "dur": 17.228, + "args": { + "External id": 291939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589801.129, "dur": 13.812, + "args": { + "External id": 291940,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070547, "tid": 2070547, + "ts": 5333367589824.007, "dur": 18.315, + "args": { + "External id": 291941,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367589826.867, "dur": 3.764, + "args": { + "External id": 291942,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589831.300, "dur": 10.766, + "args": { + "External id": 291943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589832.150, "dur": 9.510, + "args": { + "External id": 291944,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070547, "tid": 2070547, + "ts": 5333367589849.994, "dur": 29.185, + "args": { + "External id": 291945,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367589885.763, "dur": 66.139, + "args": { + "External id": 291946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367589889.207, "dur": 62.198, + "args": { + "External id": 291947,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589898.713, "dur": 0.903, + "args": { + "External id": 291948,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367589901.948, "dur": 30.142, + "args": { + "External id": 291949,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367589906.701, "dur": 25.185, + "args": { + "External id": 291950,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367589909.721, "dur": 3.100, + "args": { + "External id": 291951,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367589913.763, "dur": 17.803, + "args": { + "External id": 291952,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070547, "tid": 2070547, + "ts": 5333367589957.150, "dur": 12613.967, + "args": { + "External id": 291953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070547, "tid": 2070547, + "ts": 5333367589959.580, "dur": 12610.375, + "args": { + "External id": 291954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367602582.737, "dur": 7.390, + "args": { + "External id": 291955,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367602587.139, "dur": 1.040, + "args": { + "External id": 291956,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367602598.301, "dur": 143.777, + "args": { + "External id": 291957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367602603.446, "dur": 8.433, + "args": { + "External id": 291958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367602606.849, "dur": 3.798, + "args": { + "External id": 291959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367602609.436, "dur": 0.920, + "args": { + "External id": 291960,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367602614.399, "dur": 126.849, + "args": { + "External id": 291961,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367602617.782, "dur": 122.298, + "args": { + "External id": 291962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367602748.153, "dur": 5.472, + "args": { + "External id": 291963,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367602751.220, "dur": 0.934, + "args": { + "External id": 291964,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367602766.020, "dur": 2.610, + "args": { + "External id": 291965,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367602780.924, "dur": 7.907, + "args": { + "External id": 291966,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367602783.888, "dur": 4.669, + "args": { + "External id": 291967,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367602978.179, "dur": 262.807, + "args": { + "External id": 291968,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367602983.539, "dur": 2.525, + "args": { + "External id": 291969,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367602988.318, "dur": 252.160, + "args": { + "External id": 291970,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367602994.134, "dur": 0.740, + "args": { + "External id": 291971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367602997.723, "dur": 27.765, + "args": { + "External id": 291972,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367603028.392, "dur": 6.446, + "args": { + "External id": 291973,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603033.636, "dur": 0.792, + "args": { + "External id": 291974,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367603037.087, "dur": 27.019, + "args": { + "External id": 291975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367603038.246, "dur": 1.253, + "args": { + "External id": 291976,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367603041.962, "dur": 21.810, + "args": { + "External id": 291977,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603046.339, "dur": 3.601, + "args": { + "External id": 291978,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367603066.775, "dur": 25.873, + "args": { + "External id": 291979,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603095.288, "dur": 22.012, + "args": { + "External id": 291980,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367603123.864, "dur": 17.326, + "args": { + "External id": 291981,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603144.095, "dur": 16.981, + "args": { + "External id": 291982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367603163.783, "dur": 40.895, + "args": { + "External id": 291983,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603181.158, "dur": 2.475, + "args": { + "External id": 291984,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603186.876, "dur": 1.260, + "args": { + "External id": 291985,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603207.251, "dur": 14.987, + "args": { + "External id": 291986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603228.018, "dur": 11.104, + "args": { + "External id": 291987,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367603249.824, "dur": 2.778, + "args": { + "External id": 291988,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367603260.194, "dur": 4.757, + "args": { + "External id": 291989,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603263.325, "dur": 0.791, + "args": { + "External id": 291990,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367603359.535, "dur": 79.594, + "args": { + "External id": 291991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367603445.399, "dur": 5.511, + "args": { + "External id": 291992,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603448.677, "dur": 0.847, + "args": { + "External id": 291993,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603452.960, "dur": 32.406, + "args": { + "External id": 291994,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367603494.882, "dur": 6.818, + "args": { + "External id": 291995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367603497.061, "dur": 3.891, + "args": { + "External id": 291996,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603499.067, "dur": 1.632, + "args": { + "External id": 291997,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367603506.334, "dur": 48.577, + "args": { + "External id": 291998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367603507.553, "dur": 46.857, + "args": { + "External id": 291999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603565.306, "dur": 17.121, + "args": { + "External id": 292000,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367603588.515, "dur": 8.177, + "args": { + "External id": 292001,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603594.465, "dur": 0.836, + "args": { + "External id": 292002,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367603601.586, "dur": 90.481, + "args": { + "External id": 292003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367603602.568, "dur": 3.992, + "args": { + "External id": 292004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367603603.226, "dur": 2.541, + "args": { + "External id": 292005,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603604.562, "dur": 1.026, + "args": { + "External id": 292006,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367603607.265, "dur": 84.351, + "args": { + "External id": 292007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367603610.473, "dur": 79.852, + "args": { + "External id": 292008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367603698.696, "dur": 5.560, + "args": { + "External id": 292009,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603701.742, "dur": 0.991, + "args": { + "External id": 292010,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367603711.991, "dur": 1.971, + "args": { + "External id": 292011,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367603722.898, "dur": 12.504, + "args": { + "External id": 292012,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367603727.847, "dur": 7.240, + "args": { + "External id": 292013,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367603838.941, "dur": 190.704, + "args": { + "External id": 292014,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367603841.283, "dur": 2.180, + "args": { + "External id": 292015,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367603845.007, "dur": 184.210, + "args": { + "External id": 292016,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367603847.115, "dur": 0.440, + "args": { + "External id": 292017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367603848.794, "dur": 24.316, + "args": { + "External id": 292018,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367603875.035, "dur": 3.889, + "args": { + "External id": 292019,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603877.378, "dur": 1.298, + "args": { + "External id": 292020,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367603880.425, "dur": 28.417, + "args": { + "External id": 292021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367603881.866, "dur": 1.432, + "args": { + "External id": 292022,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367603887.426, "dur": 21.125, + "args": { + "External id": 292023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603893.663, "dur": 2.896, + "args": { + "External id": 292024,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367603910.524, "dur": 22.698, + "args": { + "External id": 292025,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603934.937, "dur": 13.125, + "args": { + "External id": 292026,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367603951.230, "dur": 13.014, + "args": { + "External id": 292027,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603965.669, "dur": 11.581, + "args": { + "External id": 292028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367603979.698, "dur": 22.870, + "args": { + "External id": 292029,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367603981.685, "dur": 0.995, + "args": { + "External id": 292030,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367603987.955, "dur": 1.194, + "args": { + "External id": 292031,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604004.188, "dur": 11.558, + "args": { + "External id": 292032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604016.878, "dur": 11.288, + "args": { + "External id": 292033,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367604036.721, "dur": 5.010, + "args": { + "External id": 292034,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367604051.560, "dur": 3.932, + "args": { + "External id": 292035,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604054.145, "dur": 0.412, + "args": { + "External id": 292036,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367604124.096, "dur": 74.024, + "args": { + "External id": 292037,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367604205.945, "dur": 8.598, + "args": { + "External id": 292038,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604211.727, "dur": 1.330, + "args": { + "External id": 292039,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604216.086, "dur": 29.672, + "args": { + "External id": 292040,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367604251.450, "dur": 6.197, + "args": { + "External id": 292041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367604253.074, "dur": 3.816, + "args": { + "External id": 292042,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604255.516, "dur": 1.198, + "args": { + "External id": 292043,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367604261.038, "dur": 47.623, + "args": { + "External id": 292044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367604264.122, "dur": 44.030, + "args": { + "External id": 292045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604313.449, "dur": 14.548, + "args": { + "External id": 292046,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367604334.155, "dur": 4.481, + "args": { + "External id": 292047,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604336.706, "dur": 0.730, + "args": { + "External id": 292048,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367604343.452, "dur": 50.084, + "args": { + "External id": 292049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367604344.671, "dur": 6.296, + "args": { + "External id": 292050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367604345.478, "dur": 4.779, + "args": { + "External id": 292051,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604349.404, "dur": 0.701, + "args": { + "External id": 292052,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367604351.694, "dur": 41.463, + "args": { + "External id": 292053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367604352.635, "dur": 39.918, + "args": { + "External id": 292054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367604397.600, "dur": 5.588, + "args": { + "External id": 292055,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604399.396, "dur": 2.723, + "args": { + "External id": 292056,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367604409.477, "dur": 1.852, + "args": { + "External id": 292057,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367604419.612, "dur": 8.414, + "args": { + "External id": 292058,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367604424.087, "dur": 3.633, + "args": { + "External id": 292059,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367604523.359, "dur": 259.104, + "args": { + "External id": 292060,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367604525.951, "dur": 1.984, + "args": { + "External id": 292061,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367604529.409, "dur": 252.582, + "args": { + "External id": 292062,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367604530.568, "dur": 0.295, + "args": { + "External id": 292063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367604532.612, "dur": 26.223, + "args": { + "External id": 292064,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367604560.504, "dur": 3.682, + "args": { + "External id": 292065,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604563.006, "dur": 0.958, + "args": { + "External id": 292066,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367604565.395, "dur": 37.229, + "args": { + "External id": 292067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367604568.316, "dur": 1.360, + "args": { + "External id": 292068,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367604570.918, "dur": 31.402, + "args": { + "External id": 292069,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604576.657, "dur": 2.292, + "args": { + "External id": 292070,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367604604.240, "dur": 58.666, + "args": { + "External id": 292071,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604665.517, "dur": 23.835, + "args": { + "External id": 292072,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367604692.331, "dur": 24.018, + "args": { + "External id": 292073,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604717.942, "dur": 12.081, + "args": { + "External id": 292074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367604732.321, "dur": 23.049, + "args": { + "External id": 292075,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604734.315, "dur": 1.376, + "args": { + "External id": 292076,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604740.104, "dur": 0.771, + "args": { + "External id": 292077,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604756.791, "dur": 11.212, + "args": { + "External id": 292078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604769.247, "dur": 11.591, + "args": { + "External id": 292079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367604791.036, "dur": 2.557, + "args": { + "External id": 292080,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367604804.048, "dur": 4.510, + "args": { + "External id": 292081,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604807.417, "dur": 0.399, + "args": { + "External id": 292082,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367604882.798, "dur": 56.973, + "args": { + "External id": 292083,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367604947.736, "dur": 4.751, + "args": { + "External id": 292084,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604950.199, "dur": 1.077, + "args": { + "External id": 292085,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367604954.133, "dur": 23.456, + "args": { + "External id": 292086,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367604982.255, "dur": 5.819, + "args": { + "External id": 292087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367604984.217, "dur": 3.135, + "args": { + "External id": 292088,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367604986.142, "dur": 0.972, + "args": { + "External id": 292089,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367604993.581, "dur": 39.866, + "args": { + "External id": 292090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367604994.857, "dur": 37.970, + "args": { + "External id": 292091,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605037.902, "dur": 14.084, + "args": { + "External id": 292092,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367605057.231, "dur": 4.809, + "args": { + "External id": 292093,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605059.715, "dur": 1.130, + "args": { + "External id": 292094,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367605068.978, "dur": 52.309, + "args": { + "External id": 292095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367605070.046, "dur": 9.637, + "args": { + "External id": 292096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367605073.324, "dur": 5.828, + "args": { + "External id": 292097,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605077.904, "dur": 1.085, + "args": { + "External id": 292098,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367605080.349, "dur": 40.508, + "args": { + "External id": 292099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367605080.956, "dur": 39.171, + "args": { + "External id": 292100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367605125.060, "dur": 4.054, + "args": { + "External id": 292101,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605127.114, "dur": 0.963, + "args": { + "External id": 292102,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367605136.888, "dur": 1.555, + "args": { + "External id": 292103,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605146.254, "dur": 5.820, + "args": { + "External id": 292104,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605148.677, "dur": 3.143, + "args": { + "External id": 292105,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367605259.756, "dur": 179.631, + "args": { + "External id": 292106,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605262.455, "dur": 2.914, + "args": { + "External id": 292107,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367605266.849, "dur": 172.028, + "args": { + "External id": 292108,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367605271.281, "dur": 0.319, + "args": { + "External id": 292109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367605275.304, "dur": 21.854, + "args": { + "External id": 292110,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367605298.712, "dur": 4.191, + "args": { + "External id": 292111,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605301.616, "dur": 0.935, + "args": { + "External id": 292112,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367605303.817, "dur": 22.133, + "args": { + "External id": 292113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605306.789, "dur": 1.233, + "args": { + "External id": 292114,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367605309.294, "dur": 16.397, + "args": { + "External id": 292115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605311.961, "dur": 2.787, + "args": { + "External id": 292116,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367605327.274, "dur": 21.261, + "args": { + "External id": 292117,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605349.839, "dur": 12.806, + "args": { + "External id": 292118,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367605365.455, "dur": 12.949, + "args": { + "External id": 292119,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605382.104, "dur": 11.173, + "args": { + "External id": 292120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367605395.037, "dur": 18.551, + "args": { + "External id": 292121,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605397.409, "dur": 0.991, + "args": { + "External id": 292122,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605400.235, "dur": 0.893, + "args": { + "External id": 292123,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605414.994, "dur": 10.984, + "args": { + "External id": 292124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605427.175, "dur": 10.838, + "args": { + "External id": 292125,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367605449.007, "dur": 2.165, + "args": { + "External id": 292126,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367605459.867, "dur": 3.677, + "args": { + "External id": 292127,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605462.168, "dur": 0.606, + "args": { + "External id": 292128,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367605526.907, "dur": 52.666, + "args": { + "External id": 292129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367605584.265, "dur": 4.241, + "args": { + "External id": 292130,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605586.781, "dur": 0.730, + "args": { + "External id": 292131,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605589.955, "dur": 22.909, + "args": { + "External id": 292132,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367605617.707, "dur": 47.544, + "args": { + "External id": 292133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367605657.468, "dur": 6.804, + "args": { + "External id": 292134,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605659.903, "dur": 3.928, + "args": { + "External id": 292135,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367605669.183, "dur": 45.109, + "args": { + "External id": 292136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367605670.118, "dur": 43.685, + "args": { + "External id": 292137,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605719.428, "dur": 15.300, + "args": { + "External id": 292138,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367605743.686, "dur": 7.907, + "args": { + "External id": 292139,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605748.833, "dur": 1.224, + "args": { + "External id": 292140,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367605755.972, "dur": 47.764, + "args": { + "External id": 292141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367605756.951, "dur": 4.204, + "args": { + "External id": 292142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367605758.034, "dur": 2.444, + "args": { + "External id": 292143,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605759.265, "dur": 1.066, + "args": { + "External id": 292144,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367605762.096, "dur": 41.281, + "args": { + "External id": 292145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367605762.865, "dur": 39.959, + "args": { + "External id": 292146,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367605807.770, "dur": 3.644, + "args": { + "External id": 292147,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605809.774, "dur": 0.653, + "args": { + "External id": 292148,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367605819.370, "dur": 1.547, + "args": { + "External id": 292149,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605828.933, "dur": 8.370, + "args": { + "External id": 292150,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605830.777, "dur": 6.217, + "args": { + "External id": 292151,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367605927.626, "dur": 189.854, + "args": { + "External id": 292152,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605929.444, "dur": 1.943, + "args": { + "External id": 292153,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367605935.175, "dur": 181.900, + "args": { + "External id": 292154,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367605936.700, "dur": 0.245, + "args": { + "External id": 292155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367605937.867, "dur": 24.052, + "args": { + "External id": 292156,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367605963.819, "dur": 3.607, + "args": { + "External id": 292157,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367605966.285, "dur": 0.741, + "args": { + "External id": 292158,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367605968.252, "dur": 22.797, + "args": { + "External id": 292159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367605969.530, "dur": 1.338, + "args": { + "External id": 292160,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367605972.152, "dur": 18.593, + "args": { + "External id": 292161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367605977.323, "dur": 2.688, + "args": { + "External id": 292162,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367605992.270, "dur": 20.806, + "args": { + "External id": 292163,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606014.465, "dur": 13.177, + "args": { + "External id": 292164,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367606032.823, "dur": 12.425, + "args": { + "External id": 292165,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606046.553, "dur": 11.679, + "args": { + "External id": 292166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367606060.047, "dur": 18.861, + "args": { + "External id": 292167,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606062.373, "dur": 0.967, + "args": { + "External id": 292168,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606065.681, "dur": 1.000, + "args": { + "External id": 292169,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606080.684, "dur": 17.192, + "args": { + "External id": 292170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606101.150, "dur": 14.944, + "args": { + "External id": 292171,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367606124.579, "dur": 1.949, + "args": { + "External id": 292172,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367606136.144, "dur": 3.153, + "args": { + "External id": 292173,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606138.154, "dur": 0.402, + "args": { + "External id": 292174,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367606230.660, "dur": 59.532, + "args": { + "External id": 292175,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367606296.136, "dur": 6.526, + "args": { + "External id": 292176,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606299.626, "dur": 1.658, + "args": { + "External id": 292177,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606304.275, "dur": 25.874, + "args": { + "External id": 292178,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367606337.907, "dur": 5.928, + "args": { + "External id": 292179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367606339.556, "dur": 3.602, + "args": { + "External id": 292180,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606341.966, "dur": 1.008, + "args": { + "External id": 292181,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367606346.750, "dur": 40.559, + "args": { + "External id": 292182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367606347.855, "dur": 38.820, + "args": { + "External id": 292183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606391.486, "dur": 14.656, + "args": { + "External id": 292184,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367606411.545, "dur": 6.790, + "args": { + "External id": 292185,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606416.510, "dur": 0.515, + "args": { + "External id": 292186,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367606422.226, "dur": 51.259, + "args": { + "External id": 292187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367606422.997, "dur": 5.976, + "args": { + "External id": 292188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367606423.716, "dur": 4.748, + "args": { + "External id": 292189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606424.762, "dur": 3.497, + "args": { + "External id": 292190,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367606429.620, "dur": 43.396, + "args": { + "External id": 292191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367606432.372, "dur": 40.120, + "args": { + "External id": 292192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367606477.282, "dur": 3.943, + "args": { + "External id": 292193,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606479.585, "dur": 0.694, + "args": { + "External id": 292194,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367606486.823, "dur": 1.658, + "args": { + "External id": 292195,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367606496.927, "dur": 8.077, + "args": { + "External id": 292196,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367606500.888, "dur": 3.840, + "args": { + "External id": 292197,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367606585.718, "dur": 216.755, + "args": { + "External id": 292198,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367606587.748, "dur": 1.589, + "args": { + "External id": 292199,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367606591.258, "dur": 210.847, + "args": { + "External id": 292200,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367606592.477, "dur": 0.306, + "args": { + "External id": 292201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367606596.963, "dur": 19.730, + "args": { + "External id": 292202,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367606618.509, "dur": 3.321, + "args": { + "External id": 292203,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606620.542, "dur": 0.922, + "args": { + "External id": 292204,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367606659.343, "dur": 29.346, + "args": { + "External id": 292205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367606660.960, "dur": 4.718, + "args": { + "External id": 292206,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367606669.550, "dur": 18.668, + "args": { + "External id": 292207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606672.679, "dur": 2.873, + "args": { + "External id": 292208,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367606689.953, "dur": 20.709, + "args": { + "External id": 292209,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606712.354, "dur": 13.104, + "args": { + "External id": 292210,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367606727.842, "dur": 12.665, + "args": { + "External id": 292211,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606741.834, "dur": 11.820, + "args": { + "External id": 292212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367606755.533, "dur": 20.892, + "args": { + "External id": 292213,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606757.562, "dur": 1.083, + "args": { + "External id": 292214,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606762.286, "dur": 0.657, + "args": { + "External id": 292215,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606777.817, "dur": 11.108, + "args": { + "External id": 292216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606790.212, "dur": 10.624, + "args": { + "External id": 292217,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367606811.098, "dur": 2.498, + "args": { + "External id": 292218,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367606823.383, "dur": 3.590, + "args": { + "External id": 292219,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606825.692, "dur": 0.554, + "args": { + "External id": 292220,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367606897.764, "dur": 56.855, + "args": { + "External id": 292221,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367606959.928, "dur": 7.682, + "args": { + "External id": 292222,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367606965.273, "dur": 1.328, + "args": { + "External id": 292223,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367606969.092, "dur": 26.055, + "args": { + "External id": 292224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367606999.788, "dur": 8.355, + "args": { + "External id": 292225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367607001.373, "dur": 5.742, + "args": { + "External id": 292226,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607003.072, "dur": 3.827, + "args": { + "External id": 292227,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367607010.972, "dur": 41.472, + "args": { + "External id": 292228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367607014.371, "dur": 37.523, + "args": { + "External id": 292229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607056.857, "dur": 13.559, + "args": { + "External id": 292230,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367607076.196, "dur": 4.356, + "args": { + "External id": 292231,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607078.458, "dur": 0.804, + "args": { + "External id": 292232,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367607084.673, "dur": 49.325, + "args": { + "External id": 292233,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367607085.720, "dur": 6.058, + "args": { + "External id": 292234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367607086.493, "dur": 4.591, + "args": { + "External id": 292235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607090.225, "dur": 0.725, + "args": { + "External id": 292236,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367607092.383, "dur": 41.259, + "args": { + "External id": 292237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367607093.055, "dur": 40.012, + "args": { + "External id": 292238,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367607138.014, "dur": 4.038, + "args": { + "External id": 292239,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607140.495, "dur": 0.646, + "args": { + "External id": 292240,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367607147.540, "dur": 1.464, + "args": { + "External id": 292241,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607156.075, "dur": 9.067, + "args": { + "External id": 292242,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607161.129, "dur": 3.741, + "args": { + "External id": 292243,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367607268.717, "dur": 177.389, + "args": { + "External id": 292244,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607270.954, "dur": 3.039, + "args": { + "External id": 292245,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367607278.573, "dur": 167.091, + "args": { + "External id": 292246,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367607280.042, "dur": 0.297, + "args": { + "External id": 292247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367607281.438, "dur": 20.452, + "args": { + "External id": 292248,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367607303.663, "dur": 5.693, + "args": { + "External id": 292249,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607305.595, "dur": 3.297, + "args": { + "External id": 292250,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367607310.441, "dur": 22.407, + "args": { + "External id": 292251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607313.770, "dur": 1.241, + "args": { + "External id": 292252,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367607316.378, "dur": 16.199, + "args": { + "External id": 292253,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607318.871, "dur": 2.254, + "args": { + "External id": 292254,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367607334.169, "dur": 19.309, + "args": { + "External id": 292255,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607354.924, "dur": 12.617, + "args": { + "External id": 292256,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367607370.262, "dur": 12.185, + "args": { + "External id": 292257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607383.669, "dur": 11.024, + "args": { + "External id": 292258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367607396.304, "dur": 22.961, + "args": { + "External id": 292259,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607398.011, "dur": 1.199, + "args": { + "External id": 292260,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607403.467, "dur": 2.817, + "args": { + "External id": 292261,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607420.566, "dur": 11.738, + "args": { + "External id": 292262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607433.215, "dur": 11.440, + "args": { + "External id": 292263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367607452.997, "dur": 1.948, + "args": { + "External id": 292264,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367607463.922, "dur": 3.775, + "args": { + "External id": 292265,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607466.536, "dur": 0.399, + "args": { + "External id": 292266,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367607528.576, "dur": 51.206, + "args": { + "External id": 292267,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367607587.140, "dur": 4.903, + "args": { + "External id": 292268,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607590.088, "dur": 0.879, + "args": { + "External id": 292269,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607593.714, "dur": 25.065, + "args": { + "External id": 292270,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367607658.544, "dur": 7.096, + "args": { + "External id": 292271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367607660.427, "dur": 4.260, + "args": { + "External id": 292272,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607662.631, "dur": 1.696, + "args": { + "External id": 292273,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367607671.918, "dur": 45.598, + "args": { + "External id": 292274,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367607673.152, "dur": 43.787, + "args": { + "External id": 292275,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607721.962, "dur": 14.958, + "args": { + "External id": 292276,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367607742.975, "dur": 4.505, + "args": { + "External id": 292277,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607745.406, "dur": 0.896, + "args": { + "External id": 292278,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367607751.668, "dur": 48.940, + "args": { + "External id": 292279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367607752.428, "dur": 6.784, + "args": { + "External id": 292280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367607755.863, "dur": 2.754, + "args": { + "External id": 292281,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607757.364, "dur": 1.120, + "args": { + "External id": 292282,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367607759.847, "dur": 40.432, + "args": { + "External id": 292283,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367607760.355, "dur": 39.329, + "args": { + "External id": 292284,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367607804.305, "dur": 4.125, + "args": { + "External id": 292285,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607806.605, "dur": 0.743, + "args": { + "External id": 292286,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367607817.070, "dur": 1.700, + "args": { + "External id": 292287,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607826.376, "dur": 5.902, + "args": { + "External id": 292288,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607828.297, "dur": 3.675, + "args": { + "External id": 292289,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367607916.209, "dur": 172.691, + "args": { + "External id": 292290,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607920.596, "dur": 1.562, + "args": { + "External id": 292291,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367607923.627, "dur": 164.615, + "args": { + "External id": 292292,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367607925.129, "dur": 0.647, + "args": { + "External id": 292293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367607929.603, "dur": 19.983, + "args": { + "External id": 292294,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367607951.068, "dur": 4.420, + "args": { + "External id": 292295,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367607954.438, "dur": 0.810, + "args": { + "External id": 292296,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367607956.659, "dur": 19.121, + "args": { + "External id": 292297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367607957.563, "dur": 1.115, + "args": { + "External id": 292298,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367607960.170, "dur": 15.338, + "args": { + "External id": 292299,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607962.837, "dur": 2.540, + "args": { + "External id": 292300,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367607977.393, "dur": 19.333, + "args": { + "External id": 292301,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367607998.357, "dur": 14.409, + "args": { + "External id": 292302,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367608015.373, "dur": 12.547, + "args": { + "External id": 292303,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608031.762, "dur": 11.431, + "args": { + "External id": 292304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367608044.834, "dur": 17.329, + "args": { + "External id": 292305,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608046.968, "dur": 0.979, + "args": { + "External id": 292306,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608049.898, "dur": 0.561, + "args": { + "External id": 292307,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608063.580, "dur": 11.761, + "args": { + "External id": 292308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608076.665, "dur": 10.707, + "args": { + "External id": 292309,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367608098.190, "dur": 1.590, + "args": { + "External id": 292310,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367608107.590, "dur": 3.620, + "args": { + "External id": 292311,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608109.651, "dur": 0.727, + "args": { + "External id": 292312,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367608187.528, "dur": 54.579, + "args": { + "External id": 292313,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367608247.760, "dur": 5.753, + "args": { + "External id": 292314,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608250.935, "dur": 1.318, + "args": { + "External id": 292315,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608254.746, "dur": 24.804, + "args": { + "External id": 292316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367608284.128, "dur": 8.362, + "args": { + "External id": 292317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367608288.022, "dur": 3.579, + "args": { + "External id": 292318,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608289.901, "dur": 1.547, + "args": { + "External id": 292319,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367608295.093, "dur": 37.730, + "args": { + "External id": 292320,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367608296.261, "dur": 35.877, + "args": { + "External id": 292321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608336.768, "dur": 14.039, + "args": { + "External id": 292322,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367608355.883, "dur": 25.805, + "args": { + "External id": 292323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367608358.491, "dur": 22.817, + "args": { + "External id": 292324,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608363.267, "dur": 2.904, + "args": { + "External id": 292325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333367608388.563, "dur": 29.284, + "args": { + "External id": 292326,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070547, "tid": 2070547, + "ts": 5333367608392.980, "dur": 24.482, + "args": { + "External id": 292327,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608397.525, "dur": 3.567, + "args": { + "External id": 292328,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608402.211, "dur": 14.704, + "args": { + "External id": 292329,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2070547, + "ts": 5333367608432.323, "dur": 6.106, + "args": { + "External id": 292330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2070547, + "ts": 5333367608434.371, "dur": 3.762, + "args": { + "External id": 292331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070547, "tid": 2070547, + "ts": 5333367608439.575, "dur": 1.102, + "args": { + "External id": 292332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070547, "tid": 2070547, + "ts": 5333367608440.065, "dur": 0.548, + "args": { + "External id": 292333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367608493.152, "dur": 3.334, + "args": { + "External id": 292334,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32000]", "5", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367608693.311, "dur": 9.093, + "args": { + "External id": 292335,"Sequence number": 1209228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7867 + } + }, + { + "ph": "s", "id": 4, "pid": 2070547, "tid": 2070547, "ts": 5333367608693.311, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367608705.618, "dur": 1.293, + "args": { + "External id": 292336,"Sequence number": 1209229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[131072000, 32000, 1], []], "Input Dims": [[16, 4096, 32000], []], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunction", "pid": 2070547, "tid": 2070547, + "ts": 5333367608740.387, "dur": 8589.270, + "args": { + "External id": 292337,"Sequence number": 1209229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "1.", "8"], "Input type": ["c10::BFloat16", "c10::Half", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [32000, 1], [2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 32000], [32000, 2048], [], [], [], []], "Ev Idx": 7869 + } + }, + { + "ph": "s", "id": 3, "pid": 2070547, "tid": 2070547, "ts": 5333367608740.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367608755.728, "dur": 43.227, + "args": { + "External id": 292338,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367608757.636, "dur": 13.430, + "args": { + "External id": 292339,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608760.812, "dur": 9.693, + "args": { + "External id": 292340,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608775.266, "dur": 23.475, + "args": { + "External id": 292341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608777.871, "dur": 20.390, + "args": { + "External id": 292342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367608802.985, "dur": 22.860, + "args": { + "External id": 292343,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367608803.789, "dur": 4.377, + "args": { + "External id": 292344,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608805.343, "dur": 2.509, + "args": { + "External id": 292345,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608812.832, "dur": 12.821, + "args": { + "External id": 292346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608813.581, "dur": 11.656, + "args": { + "External id": 292347,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070547, "tid": 2070547, + "ts": 5333367608832.515, "dur": 20.745, + "args": { + "External id": 292348,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367608834.236, "dur": 3.592, + "args": { + "External id": 292349,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608838.543, "dur": 14.451, + "args": { + "External id": 292350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367608843.576, "dur": 9.122, + "args": { + "External id": 292351,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367608859.471, "dur": 32.088, + "args": { + "External id": 292352,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608887.993, "dur": 1.596, + "args": { + "External id": 292353,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367608896.477, "dur": 93.116, + "args": { + "External id": 292354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367608897.740, "dur": 7.896, + "args": { + "External id": 292355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367608899.672, "dur": 4.960, + "args": { + "External id": 292356,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608903.225, "dur": 1.161, + "args": { + "External id": 292357,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367608906.822, "dur": 81.860, + "args": { + "External id": 292358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367608908.114, "dur": 79.563, + "args": { + "External id": 292359,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367608993.209, "dur": 3.935, + "args": { + "External id": 292360,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367608995.639, "dur": 0.525, + "args": { + "External id": 292361,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "0"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367609002.101, "dur": 1.917, + "args": { + "External id": 292362,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609013.658, "dur": 5.566, + "args": { + "External id": 292363,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609015.315, "dur": 3.613, + "args": { + "External id": 292364,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367609125.287, "dur": 225.753, + "args": { + "External id": 292365,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609127.463, "dur": 4.388, + "args": { + "External id": 292366,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367609133.421, "dur": 216.991, + "args": { + "External id": 292367,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367609134.951, "dur": 0.370, + "args": { + "External id": 292368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367609139.145, "dur": 23.451, + "args": { + "External id": 292369,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367609164.455, "dur": 24.738, + "args": { + "External id": 292370,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367609187.587, "dur": 0.971, + "args": { + "External id": 292371,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367609190.408, "dur": 27.222, + "args": { + "External id": 292372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609191.913, "dur": 2.333, + "args": { + "External id": 292373,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367609195.675, "dur": 21.629, + "args": { + "External id": 292374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609199.866, "dur": 3.920, + "args": { + "External id": 292375,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367609219.233, "dur": 22.951, + "args": { + "External id": 292376,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609244.318, "dur": 15.544, + "args": { + "External id": 292377,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367609265.159, "dur": 13.817, + "args": { + "External id": 292378,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609282.690, "dur": 15.259, + "args": { + "External id": 292379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367609300.157, "dur": 22.895, + "args": { + "External id": 292380,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609304.894, "dur": 1.306, + "args": { + "External id": 292381,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367609308.487, "dur": 0.945, + "args": { + "External id": 292382,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609324.507, "dur": 11.791, + "args": { + "External id": 292383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609337.737, "dur": 11.238, + "args": { + "External id": 292384,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367609360.922, "dur": 2.120, + "args": { + "External id": 292385,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367609371.299, "dur": 1.597, + "args": { + "External id": 292386,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609379.029, "dur": 3.930, + "args": { + "External id": 292387,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609380.349, "dur": 2.350, + "args": { + "External id": 292388,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367609466.696, "dur": 310.788, + "args": { + "External id": 292389,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609468.218, "dur": 2.343, + "args": { + "External id": 292390,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367609474.748, "dur": 302.326, + "args": { + "External id": 292391,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367609475.800, "dur": 0.268, + "args": { + "External id": 292392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367609477.015, "dur": 18.307, + "args": { + "External id": 292393,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367609496.906, "dur": 3.333, + "args": { + "External id": 292394,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367609499.181, "dur": 0.777, + "args": { + "External id": 292395,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367609500.708, "dur": 19.645, + "args": { + "External id": 292396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367609501.909, "dur": 1.713, + "args": { + "External id": 292397,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367609504.564, "dur": 15.524, + "args": { + "External id": 292398,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609508.953, "dur": 1.231, + "args": { + "External id": 292399,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367609523.377, "dur": 17.024, + "args": { + "External id": 292400,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609541.631, "dur": 34.702, + "args": { + "External id": 292401,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367609578.432, "dur": 40.251, + "args": { + "External id": 292402,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609619.538, "dur": 48.073, + "args": { + "External id": 292403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367609670.274, "dur": 34.097, + "args": { + "External id": 292404,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609672.126, "dur": 1.728, + "args": { + "External id": 292405,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367609675.386, "dur": 1.048, + "args": { + "External id": 292406,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609705.758, "dur": 39.987, + "args": { + "External id": 292407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367609756.251, "dur": 19.710, + "args": { + "External id": 292408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367609786.514, "dur": 1.776, + "args": { + "External id": 292409,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367609799.946, "dur": 30.906, + "args": { + "External id": 292410,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367609803.273, "dur": 8.268, + "args": { + "External id": 292411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367609805.744, "dur": 5.207, + "args": { + "External id": 292412,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367609814.263, "dur": 15.638, + "args": { + "External id": 292413,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367609837.246, "dur": 5.344, + "args": { + "External id": 292414,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367609840.478, "dur": 1.021, + "args": { + "External id": 292415,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367609921.678, "dur": 76.053, + "args": { + "External id": 292416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367610003.193, "dur": 6.198, + "args": { + "External id": 292417,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610007.160, "dur": 0.815, + "args": { + "External id": 292418,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610011.270, "dur": 28.761, + "args": { + "External id": 292419,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367610047.508, "dur": 6.984, + "args": { + "External id": 292420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367610049.605, "dur": 4.162, + "args": { + "External id": 292421,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610051.858, "dur": 1.711, + "args": { + "External id": 292422,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367610057.795, "dur": 43.608, + "args": { + "External id": 292423,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367610059.227, "dur": 41.486, + "args": { + "External id": 292424,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610106.178, "dur": 15.227, + "args": { + "External id": 292425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367610127.346, "dur": 6.692, + "args": { + "External id": 292426,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610132.054, "dur": 0.717, + "args": { + "External id": 292427,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367610138.520, "dur": 69.874, + "args": { + "External id": 292428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367610139.354, "dur": 3.820, + "args": { + "External id": 292429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367610139.981, "dur": 2.558, + "args": { + "External id": 292430,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610141.527, "dur": 0.857, + "args": { + "External id": 292431,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367610143.859, "dur": 64.010, + "args": { + "External id": 292432,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367610146.364, "dur": 60.534, + "args": { + "External id": 292433,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367610214.972, "dur": 5.314, + "args": { + "External id": 292434,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610217.790, "dur": 1.185, + "args": { + "External id": 292435,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "262144000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367610227.142, "dur": 1.986, + "args": { + "External id": 292436,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610239.224, "dur": 6.258, + "args": { + "External id": 292437,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610241.274, "dur": 3.942, + "args": { + "External id": 292438,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367610333.792, "dur": 178.521, + "args": { + "External id": 292439,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610338.274, "dur": 1.936, + "args": { + "External id": 292440,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367610341.697, "dur": 170.202, + "args": { + "External id": 292441,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367610343.317, "dur": 0.528, + "args": { + "External id": 292442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367610344.857, "dur": 21.059, + "args": { + "External id": 292443,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367610369.779, "dur": 5.172, + "args": { + "External id": 292444,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610373.968, "dur": 0.560, + "args": { + "External id": 292445,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367610375.708, "dur": 20.926, + "args": { + "External id": 292446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610376.896, "dur": 1.145, + "args": { + "External id": 292447,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367610379.494, "dur": 16.761, + "args": { + "External id": 292448,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610382.199, "dur": 3.277, + "args": { + "External id": 292449,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367610397.880, "dur": 21.305, + "args": { + "External id": 292450,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610420.663, "dur": 15.020, + "args": { + "External id": 292451,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367610438.117, "dur": 12.562, + "args": { + "External id": 292452,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610452.107, "dur": 11.551, + "args": { + "External id": 292453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367610467.778, "dur": 18.440, + "args": { + "External id": 292454,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610469.835, "dur": 0.900, + "args": { + "External id": 292455,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610472.438, "dur": 0.936, + "args": { + "External id": 292456,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610487.422, "dur": 10.965, + "args": { + "External id": 292457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610499.355, "dur": 11.077, + "args": { + "External id": 292458,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367610518.992, "dur": 4.335, + "args": { + "External id": 292459,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367610535.998, "dur": 1.386, + "args": { + "External id": 292460,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610543.234, "dur": 4.196, + "args": { + "External id": 292461,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610544.575, "dur": 2.619, + "args": { + "External id": 292462,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367610615.508, "dur": 194.923, + "args": { + "External id": 292463,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610616.798, "dur": 2.039, + "args": { + "External id": 292464,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367610658.659, "dur": 151.392, + "args": { + "External id": 292465,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367610660.391, "dur": 0.515, + "args": { + "External id": 292466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367610662.048, "dur": 22.946, + "args": { + "External id": 292467,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367610686.789, "dur": 2.743, + "args": { + "External id": 292468,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610688.501, "dur": 0.706, + "args": { + "External id": 292469,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367610690.252, "dur": 19.241, + "args": { + "External id": 292470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367610691.243, "dur": 1.316, + "args": { + "External id": 292471,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367610693.581, "dur": 15.583, + "args": { + "External id": 292472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610697.632, "dur": 1.583, + "args": { + "External id": 292473,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367610710.431, "dur": 16.769, + "args": { + "External id": 292474,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610728.327, "dur": 11.106, + "args": { + "External id": 292475,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367610743.793, "dur": 10.724, + "args": { + "External id": 292476,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610755.553, "dur": 10.663, + "args": { + "External id": 292477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367610767.401, "dur": 16.786, + "args": { + "External id": 292478,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610768.924, "dur": 1.082, + "args": { + "External id": 292479,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610771.627, "dur": 0.900, + "args": { + "External id": 292480,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610785.221, "dur": 10.742, + "args": { + "External id": 292481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367610799.602, "dur": 9.423, + "args": { + "External id": 292482,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367610817.177, "dur": 1.686, + "args": { + "External id": 292483,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367610827.670, "dur": 26.803, + "args": { + "External id": 292484,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367610830.116, "dur": 6.241, + "args": { + "External id": 292485,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610832.662, "dur": 3.289, + "args": { + "External id": 292486,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367610838.962, "dur": 14.847, + "args": { + "External id": 292487,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367610861.371, "dur": 4.082, + "args": { + "External id": 292488,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367610863.748, "dur": 0.747, + "args": { + "External id": 292489,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367610936.516, "dur": 66.591, + "args": { + "External id": 292490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367611008.207, "dur": 5.388, + "args": { + "External id": 292491,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611011.540, "dur": 0.959, + "args": { + "External id": 292492,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611017.399, "dur": 27.745, + "args": { + "External id": 292493,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367611050.306, "dur": 6.004, + "args": { + "External id": 292494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367611052.061, "dur": 3.498, + "args": { + "External id": 292495,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611054.292, "dur": 1.052, + "args": { + "External id": 292496,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367611059.498, "dur": 41.169, + "args": { + "External id": 292497,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367611060.719, "dur": 39.322, + "args": { + "External id": 292498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611105.077, "dur": 14.420, + "args": { + "External id": 292499,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367611127.704, "dur": 4.549, + "args": { + "External id": 292500,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611130.048, "dur": 0.707, + "args": { + "External id": 292501,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367611136.879, "dur": 72.608, + "args": { + "External id": 292502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367611137.876, "dur": 4.569, + "args": { + "External id": 292503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367611138.854, "dur": 2.960, + "args": { + "External id": 292504,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611141.122, "dur": 0.521, + "args": { + "External id": 292505,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367611145.514, "dur": 63.293, + "args": { + "External id": 292506,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367611146.203, "dur": 61.672, + "args": { + "External id": 292507,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367611215.866, "dur": 9.256, + "args": { + "External id": 292508,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611223.052, "dur": 0.901, + "args": { + "External id": 292509,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "524288000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367611231.631, "dur": 2.154, + "args": { + "External id": 292510,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611243.662, "dur": 6.311, + "args": { + "External id": 292511,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611245.476, "dur": 4.250, + "args": { + "External id": 292512,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367611335.265, "dur": 175.029, + "args": { + "External id": 292513,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611337.111, "dur": 2.235, + "args": { + "External id": 292514,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367611343.580, "dur": 166.340, + "args": { + "External id": 292515,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367611345.095, "dur": 0.398, + "args": { + "External id": 292516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367611346.617, "dur": 20.207, + "args": { + "External id": 292517,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367611368.519, "dur": 5.224, + "args": { + "External id": 292518,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611370.807, "dur": 2.669, + "args": { + "External id": 292519,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367611376.916, "dur": 20.692, + "args": { + "External id": 292520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611378.017, "dur": 1.520, + "args": { + "External id": 292521,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367611381.116, "dur": 16.215, + "args": { + "External id": 292522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611383.574, "dur": 3.026, + "args": { + "External id": 292523,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367611398.739, "dur": 20.381, + "args": { + "External id": 292524,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611420.687, "dur": 12.476, + "args": { + "External id": 292525,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367611435.847, "dur": 12.014, + "args": { + "External id": 292526,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611449.057, "dur": 11.248, + "args": { + "External id": 292527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367611461.956, "dur": 21.983, + "args": { + "External id": 292528,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611466.147, "dur": 1.310, + "args": { + "External id": 292529,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611469.918, "dur": 0.822, + "args": { + "External id": 292530,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611485.782, "dur": 10.979, + "args": { + "External id": 292531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611497.722, "dur": 11.001, + "args": { + "External id": 292532,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367611516.182, "dur": 1.619, + "args": { + "External id": 292533,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367611530.238, "dur": 1.231, + "args": { + "External id": 292534,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611536.733, "dur": 3.745, + "args": { + "External id": 292535,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611538.236, "dur": 1.975, + "args": { + "External id": 292536,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367611608.228, "dur": 200.419, + "args": { + "External id": 292537,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611612.228, "dur": 1.845, + "args": { + "External id": 292538,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367611615.058, "dur": 193.210, + "args": { + "External id": 292539,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367611616.228, "dur": 0.305, + "args": { + "External id": 292540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367611619.641, "dur": 56.234, + "args": { + "External id": 292541,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367611678.356, "dur": 6.078, + "args": { + "External id": 292542,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611683.274, "dur": 0.768, + "args": { + "External id": 292543,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367611685.146, "dur": 19.009, + "args": { + "External id": 292544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367611686.109, "dur": 1.894, + "args": { + "External id": 292545,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367611688.995, "dur": 14.908, + "args": { + "External id": 292546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611691.529, "dur": 1.963, + "args": { + "External id": 292547,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367611705.103, "dur": 16.453, + "args": { + "External id": 292548,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611722.659, "dur": 13.289, + "args": { + "External id": 292549,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367611738.326, "dur": 11.743, + "args": { + "External id": 292550,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611751.204, "dur": 11.106, + "args": { + "External id": 292551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367611765.826, "dur": 16.400, + "args": { + "External id": 292552,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611767.201, "dur": 0.866, + "args": { + "External id": 292553,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611770.056, "dur": 0.896, + "args": { + "External id": 292554,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611783.574, "dur": 10.842, + "args": { + "External id": 292555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367611795.226, "dur": 11.975, + "args": { + "External id": 292556,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367611817.128, "dur": 1.968, + "args": { + "External id": 292557,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367611828.438, "dur": 23.971, + "args": { + "External id": 292558,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367611831.085, "dur": 6.311, + "args": { + "External id": 292559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611833.650, "dur": 3.305, + "args": { + "External id": 292560,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367611839.477, "dur": 12.191, + "args": { + "External id": 292561,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367611859.237, "dur": 4.738, + "args": { + "External id": 292562,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367611862.221, "dur": 0.831, + "args": { + "External id": 292563,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367611938.364, "dur": 68.148, + "args": { + "External id": 292564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367612012.092, "dur": 5.889, + "args": { + "External id": 292565,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612015.541, "dur": 1.250, + "args": { + "External id": 292566,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612020.127, "dur": 23.169, + "args": { + "External id": 292567,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367612048.270, "dur": 8.372, + "args": { + "External id": 292568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367612050.123, "dur": 5.785, + "args": { + "External id": 292569,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612054.388, "dur": 1.310, + "args": { + "External id": 292570,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367612059.834, "dur": 42.850, + "args": { + "External id": 292571,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367612061.342, "dur": 40.845, + "args": { + "External id": 292572,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612107.458, "dur": 14.886, + "args": { + "External id": 292573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367612127.926, "dur": 4.427, + "args": { + "External id": 292574,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612130.281, "dur": 0.754, + "args": { + "External id": 292575,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367612139.679, "dur": 71.115, + "args": { + "External id": 292576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367612140.913, "dur": 6.367, + "args": { + "External id": 292577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367612141.848, "dur": 4.716, + "args": { + "External id": 292578,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612145.648, "dur": 0.776, + "args": { + "External id": 292579,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367612148.181, "dur": 62.095, + "args": { + "External id": 292580,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367612149.041, "dur": 59.973, + "args": { + "External id": 292581,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367612219.431, "dur": 4.686, + "args": { + "External id": 292582,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612221.961, "dur": 0.898, + "args": { + "External id": 292583,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "786432000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367612231.290, "dur": 2.409, + "args": { + "External id": 292584,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612241.962, "dur": 9.014, + "args": { + "External id": 292585,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612246.530, "dur": 4.168, + "args": { + "External id": 292586,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367612339.054, "dur": 179.980, + "args": { + "External id": 292587,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612341.170, "dur": 2.225, + "args": { + "External id": 292588,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367612345.193, "dur": 173.449, + "args": { + "External id": 292589,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367612346.502, "dur": 0.332, + "args": { + "External id": 292590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367612348.748, "dur": 23.082, + "args": { + "External id": 292591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367612373.197, "dur": 3.997, + "args": { + "External id": 292592,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612375.802, "dur": 1.141, + "args": { + "External id": 292593,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367612378.082, "dur": 29.813, + "args": { + "External id": 292594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612379.154, "dur": 5.523, + "args": { + "External id": 292595,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367612388.146, "dur": 19.409, + "args": { + "External id": 292596,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612393.400, "dur": 2.785, + "args": { + "External id": 292597,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367612409.214, "dur": 19.148, + "args": { + "External id": 292598,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612429.765, "dur": 12.398, + "args": { + "External id": 292599,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367612445.078, "dur": 13.004, + "args": { + "External id": 292600,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612459.359, "dur": 11.375, + "args": { + "External id": 292601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367612472.482, "dur": 20.210, + "args": { + "External id": 292602,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612474.454, "dur": 0.996, + "args": { + "External id": 292603,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612479.694, "dur": 0.599, + "args": { + "External id": 292604,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612494.106, "dur": 10.895, + "args": { + "External id": 292605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612505.924, "dur": 11.430, + "args": { + "External id": 292606,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367612525.078, "dur": 1.515, + "args": { + "External id": 292607,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367612537.286, "dur": 0.968, + "args": { + "External id": 292608,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612545.692, "dur": 4.201, + "args": { + "External id": 292609,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612547.405, "dur": 2.219, + "args": { + "External id": 292610,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367612616.377, "dur": 212.050, + "args": { + "External id": 292611,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612618.236, "dur": 1.775, + "args": { + "External id": 292612,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367612660.908, "dur": 167.127, + "args": { + "External id": 292613,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367612663.088, "dur": 0.189, + "args": { + "External id": 292614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367612664.732, "dur": 20.204, + "args": { + "External id": 292615,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367612688.775, "dur": 7.939, + "args": { + "External id": 292616,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612693.569, "dur": 2.848, + "args": { + "External id": 292617,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367612697.481, "dur": 20.892, + "args": { + "External id": 292618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367612698.374, "dur": 1.334, + "args": { + "External id": 292619,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367612700.864, "dur": 17.282, + "args": { + "External id": 292620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612703.156, "dur": 1.840, + "args": { + "External id": 292621,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367612721.601, "dur": 18.033, + "args": { + "External id": 292622,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612740.720, "dur": 14.003, + "args": { + "External id": 292623,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367612756.968, "dur": 11.506, + "args": { + "External id": 292624,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612769.334, "dur": 11.337, + "args": { + "External id": 292625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367612784.063, "dur": 17.140, + "args": { + "External id": 292626,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612785.829, "dur": 1.215, + "args": { + "External id": 292627,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612788.900, "dur": 1.216, + "args": { + "External id": 292628,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612802.884, "dur": 11.537, + "args": { + "External id": 292629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367612815.254, "dur": 11.928, + "args": { + "External id": 292630,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367612835.502, "dur": 1.641, + "args": { + "External id": 292631,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367612846.159, "dur": 23.678, + "args": { + "External id": 292632,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367612848.443, "dur": 5.943, + "args": { + "External id": 292633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612850.667, "dur": 3.347, + "args": { + "External id": 292634,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367612856.259, "dur": 12.898, + "args": { + "External id": 292635,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367612876.399, "dur": 4.524, + "args": { + "External id": 292636,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367612879.068, "dur": 0.858, + "args": { + "External id": 292637,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367612953.437, "dur": 67.999, + "args": { + "External id": 292638,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367613026.940, "dur": 5.167, + "args": { + "External id": 292639,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613029.737, "dur": 1.180, + "args": { + "External id": 292640,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613033.707, "dur": 25.726, + "args": { + "External id": 292641,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367613064.387, "dur": 8.720, + "args": { + "External id": 292642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367613066.173, "dur": 5.954, + "args": { + "External id": 292643,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613070.911, "dur": 1.005, + "args": { + "External id": 292644,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367613076.250, "dur": 40.960, + "args": { + "External id": 292645,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367613077.670, "dur": 39.013, + "args": { + "External id": 292646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613121.717, "dur": 13.956, + "args": { + "External id": 292647,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367613141.154, "dur": 4.585, + "args": { + "External id": 292648,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613143.533, "dur": 0.848, + "args": { + "External id": 292649,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367613150.261, "dur": 74.442, + "args": { + "External id": 292650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367613153.626, "dur": 6.286, + "args": { + "External id": 292651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367613154.497, "dur": 4.781, + "args": { + "External id": 292652,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613158.554, "dur": 0.571, + "args": { + "External id": 292653,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367613160.774, "dur": 63.378, + "args": { + "External id": 292654,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367613161.568, "dur": 61.566, + "args": { + "External id": 292655,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367613231.697, "dur": 4.636, + "args": { + "External id": 292656,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613234.252, "dur": 0.958, + "args": { + "External id": 292657,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1048576000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367613245.114, "dur": 2.044, + "args": { + "External id": 292658,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613254.693, "dur": 6.443, + "args": { + "External id": 292659,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613257.191, "dur": 3.703, + "args": { + "External id": 292660,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367613342.985, "dur": 171.998, + "args": { + "External id": 292661,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613345.532, "dur": 2.127, + "args": { + "External id": 292662,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367613351.406, "dur": 163.149, + "args": { + "External id": 292663,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367613352.886, "dur": 0.397, + "args": { + "External id": 292664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367613354.581, "dur": 22.459, + "args": { + "External id": 292665,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367613378.559, "dur": 3.130, + "args": { + "External id": 292666,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613380.716, "dur": 0.670, + "args": { + "External id": 292667,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367613382.969, "dur": 20.590, + "args": { + "External id": 292668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613383.864, "dur": 1.251, + "args": { + "External id": 292669,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367613386.340, "dur": 16.950, + "args": { + "External id": 292670,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613390.646, "dur": 2.151, + "args": { + "External id": 292671,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367613404.972, "dur": 18.785, + "args": { + "External id": 292672,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613424.963, "dur": 12.764, + "args": { + "External id": 292673,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367613442.357, "dur": 12.080, + "args": { + "External id": 292674,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613455.966, "dur": 11.402, + "args": { + "External id": 292675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367613468.932, "dur": 19.253, + "args": { + "External id": 292676,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613470.944, "dur": 1.626, + "args": { + "External id": 292677,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613474.624, "dur": 0.791, + "args": { + "External id": 292678,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613489.477, "dur": 10.826, + "args": { + "External id": 292679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613503.748, "dur": 9.653, + "args": { + "External id": 292680,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367613521.129, "dur": 1.456, + "args": { + "External id": 292681,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367613532.302, "dur": 1.111, + "args": { + "External id": 292682,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613538.400, "dur": 4.094, + "args": { + "External id": 292683,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613540.113, "dur": 2.137, + "args": { + "External id": 292684,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367613609.782, "dur": 194.927, + "args": { + "External id": 292685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613611.503, "dur": 2.059, + "args": { + "External id": 292686,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367613617.170, "dur": 187.241, + "args": { + "External id": 292687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367613618.111, "dur": 0.155, + "args": { + "External id": 292688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367613619.244, "dur": 53.848, + "args": { + "External id": 292689,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367613675.611, "dur": 5.566, + "args": { + "External id": 292690,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613677.544, "dur": 3.291, + "args": { + "External id": 292691,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367613681.870, "dur": 21.568, + "args": { + "External id": 292692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367613682.836, "dur": 1.735, + "args": { + "External id": 292693,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367613685.567, "dur": 17.621, + "args": { + "External id": 292694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613690.324, "dur": 2.077, + "args": { + "External id": 292695,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367613704.562, "dur": 16.886, + "args": { + "External id": 292696,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613722.717, "dur": 10.842, + "args": { + "External id": 292697,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367613736.062, "dur": 11.501, + "args": { + "External id": 292698,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613748.758, "dur": 10.777, + "args": { + "External id": 292699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367613761.017, "dur": 16.981, + "args": { + "External id": 292700,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613762.940, "dur": 0.852, + "args": { + "External id": 292701,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613765.830, "dur": 0.736, + "args": { + "External id": 292702,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613781.397, "dur": 9.869, + "args": { + "External id": 292703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367613792.098, "dur": 11.315, + "args": { + "External id": 292704,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367613811.308, "dur": 1.665, + "args": { + "External id": 292705,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367613821.435, "dur": 26.564, + "args": { + "External id": 292706,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367613824.198, "dur": 8.641, + "args": { + "External id": 292707,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613826.581, "dur": 5.806, + "args": { + "External id": 292708,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367613835.176, "dur": 12.165, + "args": { + "External id": 292709,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367613854.591, "dur": 4.300, + "args": { + "External id": 292710,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613857.174, "dur": 0.826, + "args": { + "External id": 292711,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367613926.331, "dur": 62.466, + "args": { + "External id": 292712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367613994.250, "dur": 4.879, + "args": { + "External id": 292713,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367613997.092, "dur": 0.949, + "args": { + "External id": 292714,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614003.151, "dur": 23.486, + "args": { + "External id": 292715,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367614031.020, "dur": 6.207, + "args": { + "External id": 292716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367614032.912, "dur": 3.582, + "args": { + "External id": 292717,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614035.321, "dur": 0.939, + "args": { + "External id": 292718,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367614040.247, "dur": 41.167, + "args": { + "External id": 292719,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367614041.347, "dur": 39.377, + "args": { + "External id": 292720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614085.919, "dur": 14.154, + "args": { + "External id": 292721,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367614107.646, "dur": 4.636, + "args": { + "External id": 292722,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614110.381, "dur": 0.775, + "args": { + "External id": 292723,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367614116.445, "dur": 66.423, + "args": { + "External id": 292724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367614117.742, "dur": 3.977, + "args": { + "External id": 292725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367614118.548, "dur": 2.547, + "args": { + "External id": 292726,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614120.300, "dur": 0.662, + "args": { + "External id": 292727,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367614124.684, "dur": 57.240, + "args": { + "External id": 292728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367614125.418, "dur": 40.066, + "args": { + "External id": 292729,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367614189.183, "dur": 4.858, + "args": { + "External id": 292730,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614191.940, "dur": 0.849, + "args": { + "External id": 292731,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1310720000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367614200.377, "dur": 1.816, + "args": { + "External id": 292732,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614210.842, "dur": 6.197, + "args": { + "External id": 292733,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614212.819, "dur": 3.896, + "args": { + "External id": 292734,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367614296.518, "dur": 176.052, + "args": { + "External id": 292735,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614301.021, "dur": 1.973, + "args": { + "External id": 292736,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367614304.560, "dur": 167.637, + "args": { + "External id": 292737,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367614306.181, "dur": 0.354, + "args": { + "External id": 292738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367614310.178, "dur": 19.526, + "args": { + "External id": 292739,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367614331.281, "dur": 5.113, + "args": { + "External id": 292740,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614335.093, "dur": 0.933, + "args": { + "External id": 292741,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367614337.072, "dur": 20.079, + "args": { + "External id": 292742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614338.196, "dur": 1.187, + "args": { + "External id": 292743,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367614340.554, "dur": 16.206, + "args": { + "External id": 292744,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614343.274, "dur": 2.584, + "args": { + "External id": 292745,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367614358.313, "dur": 18.377, + "args": { + "External id": 292746,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614377.871, "dur": 12.835, + "args": { + "External id": 292747,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367614393.018, "dur": 12.352, + "args": { + "External id": 292748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614408.500, "dur": 14.280, + "args": { + "External id": 292749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367614424.466, "dur": 21.682, + "args": { + "External id": 292750,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614428.695, "dur": 0.926, + "args": { + "External id": 292751,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614431.915, "dur": 0.651, + "args": { + "External id": 292752,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614447.797, "dur": 10.897, + "args": { + "External id": 292753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614459.657, "dur": 11.567, + "args": { + "External id": 292754,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367614480.369, "dur": 1.504, + "args": { + "External id": 292755,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367614490.604, "dur": 1.034, + "args": { + "External id": 292756,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614496.721, "dur": 3.891, + "args": { + "External id": 292757,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614498.190, "dur": 2.135, + "args": { + "External id": 292758,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367614567.577, "dur": 350.448, + "args": { + "External id": 292759,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614569.091, "dur": 2.239, + "args": { + "External id": 292760,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367614572.979, "dur": 344.661, + "args": { + "External id": 292761,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367614573.965, "dur": 0.340, + "args": { + "External id": 292762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367614575.507, "dur": 17.857, + "args": { + "External id": 292763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367614594.541, "dur": 3.161, + "args": { + "External id": 292764,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614596.623, "dur": 0.874, + "args": { + "External id": 292765,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367614598.485, "dur": 20.762, + "args": { + "External id": 292766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367614599.453, "dur": 1.431, + "args": { + "External id": 292767,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367614601.956, "dur": 17.021, + "args": { + "External id": 292768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614608.273, "dur": 1.322, + "args": { + "External id": 292769,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367614620.289, "dur": 59.163, + "args": { + "External id": 292770,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614681.717, "dur": 30.531, + "args": { + "External id": 292771,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367614714.539, "dur": 44.746, + "args": { + "External id": 292772,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614760.291, "dur": 44.144, + "args": { + "External id": 292773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367614806.126, "dur": 43.022, + "args": { + "External id": 292774,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614807.829, "dur": 1.133, + "args": { + "External id": 292775,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614811.006, "dur": 1.062, + "args": { + "External id": 292776,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614852.260, "dur": 32.249, + "args": { + "External id": 292777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367614885.479, "dur": 30.882, + "args": { + "External id": 292778,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367614924.362, "dur": 2.033, + "args": { + "External id": 292779,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367614934.995, "dur": 25.800, + "args": { + "External id": 292780,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367614937.275, "dur": 6.796, + "args": { + "External id": 292781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614939.752, "dur": 3.963, + "args": { + "External id": 292782,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367614945.760, "dur": 14.412, + "args": { + "External id": 292783,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367614967.446, "dur": 4.750, + "args": { + "External id": 292784,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367614970.451, "dur": 0.773, + "args": { + "External id": 292785,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367615040.647, "dur": 67.023, + "args": { + "External id": 292786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367615113.260, "dur": 7.497, + "args": { + "External id": 292787,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615116.256, "dur": 3.035, + "args": { + "External id": 292788,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615124.759, "dur": 24.607, + "args": { + "External id": 292789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367615154.543, "dur": 6.348, + "args": { + "External id": 292790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367615156.563, "dur": 3.474, + "args": { + "External id": 292791,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615158.706, "dur": 1.116, + "args": { + "External id": 292792,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367615163.758, "dur": 64.612, + "args": { + "External id": 292793,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367615165.058, "dur": 62.410, + "args": { + "External id": 292794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615234.508, "dur": 16.662, + "args": { + "External id": 292795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367615260.000, "dur": 5.534, + "args": { + "External id": 292796,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615263.060, "dur": 0.948, + "args": { + "External id": 292797,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367615269.884, "dur": 50.501, + "args": { + "External id": 292798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367615271.024, "dur": 3.964, + "args": { + "External id": 292799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367615271.928, "dur": 2.433, + "args": { + "External id": 292800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615273.370, "dur": 0.836, + "args": { + "External id": 292801,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367615277.804, "dur": 42.143, + "args": { + "External id": 292802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367615278.343, "dur": 41.027, + "args": { + "External id": 292803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367615324.322, "dur": 3.783, + "args": { + "External id": 292804,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615326.349, "dur": 0.874, + "args": { + "External id": 292805,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1572864000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367615334.778, "dur": 1.910, + "args": { + "External id": 292806,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615344.559, "dur": 8.947, + "args": { + "External id": 292807,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615349.467, "dur": 3.758, + "args": { + "External id": 292808,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367615437.210, "dur": 169.314, + "args": { + "External id": 292809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615439.688, "dur": 1.896, + "args": { + "External id": 292810,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367615442.981, "dur": 163.071, + "args": { + "External id": 292811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367615446.988, "dur": 0.385, + "args": { + "External id": 292812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367615448.462, "dur": 19.593, + "args": { + "External id": 292813,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367615469.622, "dur": 3.811, + "args": { + "External id": 292814,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615472.098, "dur": 1.073, + "args": { + "External id": 292815,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367615476.463, "dur": 21.360, + "args": { + "External id": 292816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615479.612, "dur": 1.267, + "args": { + "External id": 292817,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367615482.037, "dur": 15.548, + "args": { + "External id": 292818,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615484.605, "dur": 2.655, + "args": { + "External id": 292819,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367615499.155, "dur": 17.984, + "args": { + "External id": 292820,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615518.529, "dur": 12.364, + "args": { + "External id": 292821,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367615533.231, "dur": 12.532, + "args": { + "External id": 292822,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615547.131, "dur": 11.403, + "args": { + "External id": 292823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367615560.281, "dur": 20.078, + "args": { + "External id": 292824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615564.137, "dur": 1.234, + "args": { + "External id": 292825,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615567.440, "dur": 0.957, + "args": { + "External id": 292826,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615581.839, "dur": 10.962, + "args": { + "External id": 292827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615594.230, "dur": 10.540, + "args": { + "External id": 292828,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367615613.110, "dur": 1.594, + "args": { + "External id": 292829,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367615660.305, "dur": 1.895, + "args": { + "External id": 292830,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615671.002, "dur": 5.757, + "args": { + "External id": 292831,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615672.937, "dur": 3.563, + "args": { + "External id": 292832,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367615748.026, "dur": 155.592, + "args": { + "External id": 292833,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615752.341, "dur": 1.977, + "args": { + "External id": 292834,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367615755.887, "dur": 147.379, + "args": { + "External id": 292835,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367615757.312, "dur": 0.190, + "args": { + "External id": 292836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367615758.741, "dur": 17.021, + "args": { + "External id": 292837,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367615779.316, "dur": 5.482, + "args": { + "External id": 292838,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615783.608, "dur": 0.964, + "args": { + "External id": 292839,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367615785.539, "dur": 17.611, + "args": { + "External id": 292840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367615786.391, "dur": 1.181, + "args": { + "External id": 292841,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367615788.745, "dur": 14.099, + "args": { + "External id": 292842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615790.632, "dur": 1.890, + "args": { + "External id": 292843,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367615804.072, "dur": 15.497, + "args": { + "External id": 292844,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615820.343, "dur": 13.099, + "args": { + "External id": 292845,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367615835.728, "dur": 11.241, + "args": { + "External id": 292846,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615847.729, "dur": 10.942, + "args": { + "External id": 292847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367615862.019, "dur": 15.964, + "args": { + "External id": 292848,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615863.233, "dur": 0.923, + "args": { + "External id": 292849,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615865.900, "dur": 0.877, + "args": { + "External id": 292850,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615879.106, "dur": 10.807, + "args": { + "External id": 292851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367615890.947, "dur": 11.162, + "args": { + "External id": 292852,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367615908.750, "dur": 1.306, + "args": { + "External id": 292853,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367615920.303, "dur": 27.466, + "args": { + "External id": 292854,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367615922.278, "dur": 8.891, + "args": { + "External id": 292855,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615927.053, "dur": 3.762, + "args": { + "External id": 292856,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367615932.747, "dur": 14.327, + "args": { + "External id": 292857,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367615954.748, "dur": 5.326, + "args": { + "External id": 292858,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367615957.960, "dur": 1.154, + "args": { + "External id": 292859,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367616021.726, "dur": 62.226, + "args": { + "External id": 292860,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367616088.719, "dur": 5.096, + "args": { + "External id": 292861,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616091.645, "dur": 1.000, + "args": { + "External id": 292862,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616095.563, "dur": 23.547, + "args": { + "External id": 292863,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367616123.848, "dur": 34.385, + "args": { + "External id": 292864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367616150.468, "dur": 6.794, + "args": { + "External id": 292865,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616156.151, "dur": 0.934, + "args": { + "External id": 292866,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367616161.127, "dur": 64.924, + "args": { + "External id": 292867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367616162.035, "dur": 62.960, + "args": { + "External id": 292868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616231.984, "dur": 16.033, + "args": { + "External id": 292869,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367616254.377, "dur": 5.291, + "args": { + "External id": 292870,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616257.328, "dur": 0.840, + "args": { + "External id": 292871,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070547, "tid": 2070547, + "ts": 5333367616263.947, "dur": 54.244, + "args": { + "External id": 292872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367616266.960, "dur": 6.780, + "args": { + "External id": 292873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367616267.710, "dur": 5.254, + "args": { + "External id": 292874,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616272.088, "dur": 0.743, + "args": { + "External id": 292875,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367616274.360, "dur": 43.364, + "args": { + "External id": 292876,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367616274.933, "dur": 42.254, + "args": { + "External id": 292877,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367616322.268, "dur": 5.855, + "args": { + "External id": 292878,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616326.645, "dur": 0.427, + "args": { + "External id": 292879,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1835008000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367616335.429, "dur": 2.043, + "args": { + "External id": 292880,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616344.583, "dur": 6.299, + "args": { + "External id": 292881,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616346.715, "dur": 3.875, + "args": { + "External id": 292882,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367616441.182, "dur": 170.885, + "args": { + "External id": 292883,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616443.629, "dur": 1.662, + "args": { + "External id": 292884,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367616446.631, "dur": 165.044, + "args": { + "External id": 292885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367616447.973, "dur": 0.309, + "args": { + "External id": 292886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367616452.275, "dur": 21.518, + "args": { + "External id": 292887,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367616475.363, "dur": 2.928, + "args": { + "External id": 292888,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616477.443, "dur": 0.569, + "args": { + "External id": 292889,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367616479.008, "dur": 24.412, + "args": { + "External id": 292890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616481.462, "dur": 1.144, + "args": { + "External id": 292891,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367616483.841, "dur": 19.250, + "args": { + "External id": 292892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616489.185, "dur": 2.571, + "args": { + "External id": 292893,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367616504.725, "dur": 19.261, + "args": { + "External id": 292894,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616525.359, "dur": 12.312, + "args": { + "External id": 292895,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367616539.733, "dur": 12.600, + "args": { + "External id": 292896,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616553.595, "dur": 11.315, + "args": { + "External id": 292897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367616566.542, "dur": 19.741, + "args": { + "External id": 292898,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616568.087, "dur": 0.956, + "args": { + "External id": 292899,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616572.566, "dur": 1.173, + "args": { + "External id": 292900,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616587.392, "dur": 10.856, + "args": { + "External id": 292901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616599.485, "dur": 11.070, + "args": { + "External id": 292902,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367616618.201, "dur": 1.525, + "args": { + "External id": 292903,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367616670.349, "dur": 1.757, + "args": { + "External id": 292904,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616680.264, "dur": 5.261, + "args": { + "External id": 292905,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616681.741, "dur": 3.541, + "args": { + "External id": 292906,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367616758.043, "dur": 154.144, + "args": { + "External id": 292907,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616759.932, "dur": 1.986, + "args": { + "External id": 292908,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070547, "tid": 2070547, + "ts": 5333367616765.423, "dur": 146.326, + "args": { + "External id": 292909,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070547, "tid": 2070547, + "ts": 5333367616766.474, "dur": 0.241, + "args": { + "External id": 292910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070547, "tid": 2070547, + "ts": 5333367616769.631, "dur": 17.723, + "args": { + "External id": 292911,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070547, "tid": 2070547, + "ts": 5333367616788.475, "dur": 5.038, + "args": { + "External id": 292912,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616790.484, "dur": 2.583, + "args": { + "External id": 292913,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367616793.980, "dur": 15.722, + "args": { + "External id": 292914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333367616794.732, "dur": 1.035, + "args": { + "External id": 292915,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333367616796.651, "dur": 12.779, + "args": { + "External id": 292916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616798.509, "dur": 1.383, + "args": { + "External id": 292917,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333367616810.695, "dur": 15.473, + "args": { + "External id": 292918,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616827.097, "dur": 12.129, + "args": { + "External id": 292919,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070547, "tid": 2070547, + "ts": 5333367616841.036, "dur": 11.178, + "args": { + "External id": 292920,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616853.028, "dur": 11.633, + "args": { + "External id": 292921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367616867.847, "dur": 17.957, + "args": { + "External id": 292922,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616869.143, "dur": 1.009, + "args": { + "External id": 292923,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616871.969, "dur": 2.545, + "args": { + "External id": 292924,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616886.798, "dur": 11.091, + "args": { + "External id": 292925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367616898.733, "dur": 12.012, + "args": { + "External id": 292926,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333367616917.313, "dur": 1.217, + "args": { + "External id": 292927,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367616926.408, "dur": 23.056, + "args": { + "External id": 292928,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367616928.623, "dur": 6.180, + "args": { + "External id": 292929,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616930.917, "dur": 3.514, + "args": { + "External id": 292930,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070547, "tid": 2070547, + "ts": 5333367616936.277, "dur": 12.400, + "args": { + "External id": 292931,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367616957.925, "dur": 4.477, + "args": { + "External id": 292932,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367616960.671, "dur": 0.869, + "args": { + "External id": 292933,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367617024.626, "dur": 62.934, + "args": { + "External id": 292934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070547, "tid": 2070547, + "ts": 5333367617092.520, "dur": 4.599, + "args": { + "External id": 292935,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367617095.194, "dur": 1.056, + "args": { + "External id": 292936,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333367617098.813, "dur": 23.724, + "args": { + "External id": 292937,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070547, "tid": 2070547, + "ts": 5333367617127.504, "dur": 8.219, + "args": { + "External id": 292938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070547, "tid": 2070547, + "ts": 5333367617131.853, "dur": 3.192, + "args": { + "External id": 292939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367617133.749, "dur": 1.088, + "args": { + "External id": 292940,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070547, "tid": 2070547, + "ts": 5333367617138.472, "dur": 59.328, + "args": { + "External id": 292941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070547, "tid": 2070547, + "ts": 5333367617139.633, "dur": 57.042, + "args": { + "External id": 292942,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070547, "tid": 2070547, + "ts": 5333367617203.696, "dur": 16.382, + "args": { + "External id": 292943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367617225.801, "dur": 39.145, + "args": { + "External id": 292944,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070547, "tid": 2070547, + "ts": 5333367617228.511, "dur": 35.903, + "args": { + "External id": 292945,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367617236.414, "dur": 1.380, + "args": { + "External id": 292946,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070547, "tid": 2070547, + "ts": 5333367617275.819, "dur": 20.786, + "args": { + "External id": 292947,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070547, "tid": 2070547, + "ts": 5333367617347.846, "dur": 18.835, + "args": { + "External id": 292948,"Sequence number": 1209230, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8480 + } + }, + { + "ph": "s", "id": 2, "pid": 2070547, "tid": 2070547, "ts": 5333367617347.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2070547, "tid": 2070547, + "ts": 5333367617479.628, "dur": 41.905, + "args": { + "External id": 292949,"Record function id": 0, "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070547, "tid": 2070547, + "ts": 5333367617671.801, "dur": 34.275, + "args": { + "External id": 292950,"Sequence number": 1209231, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8482 + } + }, + { + "ph": "s", "id": 1, "pid": 2070547, "tid": 2070547, "ts": 5333367617671.801, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367617763.533, "dur": 26.607, + "args": { + "External id": 292951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333367617764.956, "dur": 9.339, + "args": { + "External id": 292952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333367617769.109, "dur": 4.525, + "args": { + "External id": 292953,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333367617775.949, "dur": 13.858, + "args": { + "External id": 292954,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070547, "tid": 2070547, + "ts": 5333368983948.587, "dur": 56.699, + "args": { + "External id": 292955,"Sequence number": 1209232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070547, "tid": 2070547, + "ts": 5333368984014.423, "dur": 18.052, + "args": { + "External id": 292956,"Sequence number": 1209233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070547, "tid": 2070547, + "ts": 5333368984041.869, "dur": 23.395, + "args": { + "External id": 292957,"Sequence number": 1209234, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070547, "tid": 2070547, + "ts": 5333368984413.738, "dur": 24.087, + "args": { + "External id": 292958,"Sequence number": 1209235, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070547, "tid": 2070547, + "ts": 5333368984443.746, "dur": 140.702, + "args": { + "External id": 292959,"Sequence number": 1209236, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070547, "tid": 2070547, + "ts": 5333368986736.170, "dur": 3093.197, + "args": { + "External id": 292960,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070547, "tid": 2070547, + "ts": 5333368987363.286, "dur": 963.429, + "args": { + "External id": 292961,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070547, "tid": 2070547, + "ts": 5333368987382.615, "dur": 70.432, + "args": { + "External id": 292962,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333368987386.774, "dur": 13.084, + "args": { + "External id": 292963,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070547, "tid": 2070547, + "ts": 5333368987403.301, "dur": 49.432, + "args": { + "External id": 292964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[36500]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070547, "tid": 2070547, + "ts": 5333368987405.370, "dur": 46.597, + "args": { + "External id": 292965,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[36500], []], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989862.244, "dur": 3.426, + "args": { + "External id": 292966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989867.855, "dur": 0.556, + "args": { + "External id": 292967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989869.630, "dur": 0.456, + "args": { + "External id": 292968,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989871.374, "dur": 0.460, + "args": { + "External id": 292969,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989872.951, "dur": 0.434, + "args": { + "External id": 292970,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989874.607, "dur": 0.533, + "args": { + "External id": 292971,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989876.197, "dur": 0.591, + "args": { + "External id": 292972,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989879.701, "dur": 0.239, + "args": { + "External id": 292973,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989880.842, "dur": 0.451, + "args": { + "External id": 292974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989882.194, "dur": 0.404, + "args": { + "External id": 292975,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989883.515, "dur": 0.425, + "args": { + "External id": 292976,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989884.690, "dur": 0.444, + "args": { + "External id": 292977,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989886.017, "dur": 0.429, + "args": { + "External id": 292978,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989887.228, "dur": 0.239, + "args": { + "External id": 292979,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989888.210, "dur": 0.489, + "args": { + "External id": 292980,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989891.508, "dur": 0.213, + "args": { + "External id": 292981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989892.423, "dur": 0.203, + "args": { + "External id": 292982,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989893.696, "dur": 0.201, + "args": { + "External id": 292983,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989894.630, "dur": 0.200, + "args": { + "External id": 292984,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989895.779, "dur": 0.214, + "args": { + "External id": 292985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989896.734, "dur": 0.210, + "args": { + "External id": 292986,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989897.709, "dur": 0.206, + "args": { + "External id": 292987,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989898.682, "dur": 0.257, + "args": { + "External id": 292988,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989901.668, "dur": 0.238, + "args": { + "External id": 292989,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989902.600, "dur": 0.210, + "args": { + "External id": 292990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989903.621, "dur": 0.204, + "args": { + "External id": 292991,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989904.514, "dur": 0.216, + "args": { + "External id": 292992,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989905.442, "dur": 0.217, + "args": { + "External id": 292993,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989906.649, "dur": 0.208, + "args": { + "External id": 292994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989907.841, "dur": 0.212, + "args": { + "External id": 292995,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989908.794, "dur": 0.211, + "args": { + "External id": 292996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989911.365, "dur": 0.358, + "args": { + "External id": 292997,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989912.662, "dur": 0.178, + "args": { + "External id": 292998,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989914.043, "dur": 0.251, + "args": { + "External id": 292999,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989915.165, "dur": 0.221, + "args": { + "External id": 293000,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989916.229, "dur": 0.207, + "args": { + "External id": 293001,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989917.303, "dur": 0.349, + "args": { + "External id": 293002,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989918.356, "dur": 0.209, + "args": { + "External id": 293003,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989919.332, "dur": 0.326, + "args": { + "External id": 293004,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989922.412, "dur": 0.204, + "args": { + "External id": 293005,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989923.405, "dur": 0.211, + "args": { + "External id": 293006,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989924.511, "dur": 0.210, + "args": { + "External id": 293007,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989925.538, "dur": 0.214, + "args": { + "External id": 293008,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989926.610, "dur": 0.201, + "args": { + "External id": 293009,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989927.610, "dur": 0.208, + "args": { + "External id": 293010,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989928.691, "dur": 0.182, + "args": { + "External id": 293011,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989929.681, "dur": 0.209, + "args": { + "External id": 293012,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989932.692, "dur": 0.210, + "args": { + "External id": 293013,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989933.641, "dur": 0.198, + "args": { + "External id": 293014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989934.638, "dur": 0.238, + "args": { + "External id": 293015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989935.805, "dur": 0.206, + "args": { + "External id": 293016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989936.757, "dur": 0.213, + "args": { + "External id": 293017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989937.677, "dur": 0.212, + "args": { + "External id": 293018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989938.808, "dur": 0.209, + "args": { + "External id": 293019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989939.800, "dur": 0.259, + "args": { + "External id": 293020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989942.648, "dur": 0.308, + "args": { + "External id": 293021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989943.646, "dur": 0.245, + "args": { + "External id": 293022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989944.638, "dur": 0.224, + "args": { + "External id": 293023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989945.613, "dur": 0.212, + "args": { + "External id": 293024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989946.554, "dur": 0.394, + "args": { + "External id": 293025,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989947.681, "dur": 0.408, + "args": { + "External id": 293026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989948.787, "dur": 0.380, + "args": { + "External id": 293027,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989949.878, "dur": 0.371, + "args": { + "External id": 293028,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989952.898, "dur": 0.209, + "args": { + "External id": 293029,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989953.812, "dur": 0.209, + "args": { + "External id": 293030,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989954.961, "dur": 0.211, + "args": { + "External id": 293031,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989955.942, "dur": 0.186, + "args": { + "External id": 293032,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989956.908, "dur": 0.217, + "args": { + "External id": 293033,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989957.821, "dur": 0.205, + "args": { + "External id": 293034,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989958.742, "dur": 0.213, + "args": { + "External id": 293035,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989959.757, "dur": 0.203, + "args": { + "External id": 293036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989962.459, "dur": 0.206, + "args": { + "External id": 293037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989963.404, "dur": 0.202, + "args": { + "External id": 293038,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989964.367, "dur": 0.205, + "args": { + "External id": 293039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989965.359, "dur": 0.206, + "args": { + "External id": 293040,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989966.405, "dur": 0.189, + "args": { + "External id": 293041,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989967.305, "dur": 0.211, + "args": { + "External id": 293042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989969.265, "dur": 0.216, + "args": { + "External id": 293043,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989970.263, "dur": 0.217, + "args": { + "External id": 293044,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989973.421, "dur": 0.213, + "args": { + "External id": 293045,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989974.353, "dur": 0.214, + "args": { + "External id": 293046,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989975.522, "dur": 0.207, + "args": { + "External id": 293047,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989976.432, "dur": 0.210, + "args": { + "External id": 293048,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989977.352, "dur": 0.210, + "args": { + "External id": 293049,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989978.394, "dur": 0.209, + "args": { + "External id": 293050,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989983.172, "dur": 0.282, + "args": { + "External id": 293051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989984.485, "dur": 0.211, + "args": { + "External id": 293052,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989987.367, "dur": 0.203, + "args": { + "External id": 293053,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989988.409, "dur": 0.214, + "args": { + "External id": 293054,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989989.412, "dur": 0.200, + "args": { + "External id": 293055,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989990.374, "dur": 0.197, + "args": { + "External id": 293056,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989991.298, "dur": 0.451, + "args": { + "External id": 293057,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989992.577, "dur": 0.315, + "args": { + "External id": 293058,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989994.369, "dur": 0.206, + "args": { + "External id": 293059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989995.391, "dur": 0.205, + "args": { + "External id": 293060,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989998.220, "dur": 0.296, + "args": { + "External id": 293061,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368989999.253, "dur": 0.337, + "args": { + "External id": 293062,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990000.533, "dur": 0.356, + "args": { + "External id": 293063,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990001.614, "dur": 0.200, + "args": { + "External id": 293064,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990002.766, "dur": 0.232, + "args": { + "External id": 293065,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990003.992, "dur": 0.294, + "args": { + "External id": 293066,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990005.060, "dur": 0.205, + "args": { + "External id": 293067,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990005.962, "dur": 0.211, + "args": { + "External id": 293068,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990008.838, "dur": 0.221, + "args": { + "External id": 293069,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990009.879, "dur": 0.213, + "args": { + "External id": 293070,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990010.830, "dur": 0.228, + "args": { + "External id": 293071,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990011.748, "dur": 0.204, + "args": { + "External id": 293072,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990012.658, "dur": 0.203, + "args": { + "External id": 293073,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990013.561, "dur": 0.352, + "args": { + "External id": 293074,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990015.158, "dur": 0.307, + "args": { + "External id": 293075,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990017.813, "dur": 0.203, + "args": { + "External id": 293076,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990020.376, "dur": 0.200, + "args": { + "External id": 293077,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990021.298, "dur": 0.204, + "args": { + "External id": 293078,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990022.196, "dur": 0.313, + "args": { + "External id": 293079,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990023.198, "dur": 0.217, + "args": { + "External id": 293080,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990024.116, "dur": 0.337, + "args": { + "External id": 293081,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990025.277, "dur": 0.317, + "args": { + "External id": 293082,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990026.284, "dur": 0.196, + "args": { + "External id": 293083,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990027.191, "dur": 0.200, + "args": { + "External id": 293084,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990030.101, "dur": 0.201, + "args": { + "External id": 293085,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990031.069, "dur": 0.204, + "args": { + "External id": 293086,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990031.996, "dur": 0.342, + "args": { + "External id": 293087,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990033.056, "dur": 0.316, + "args": { + "External id": 293088,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990034.193, "dur": 0.324, + "args": { + "External id": 293089,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990035.244, "dur": 0.343, + "args": { + "External id": 293090,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990036.395, "dur": 0.192, + "args": { + "External id": 293091,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990037.291, "dur": 0.306, + "args": { + "External id": 293092,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990040.106, "dur": 0.417, + "args": { + "External id": 293093,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990041.209, "dur": 0.378, + "args": { + "External id": 293094,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990042.818, "dur": 0.413, + "args": { + "External id": 293095,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990043.941, "dur": 0.399, + "args": { + "External id": 293096,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990045.342, "dur": 0.343, + "args": { + "External id": 293097,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990046.402, "dur": 0.213, + "args": { + "External id": 293098,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990047.389, "dur": 0.197, + "args": { + "External id": 293099,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990048.271, "dur": 0.495, + "args": { + "External id": 293100,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990051.522, "dur": 0.507, + "args": { + "External id": 293101,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990052.718, "dur": 0.356, + "args": { + "External id": 293102,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990053.801, "dur": 0.195, + "args": { + "External id": 293103,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990054.696, "dur": 0.205, + "args": { + "External id": 293104,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990055.614, "dur": 0.215, + "args": { + "External id": 293105,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990056.509, "dur": 0.227, + "args": { + "External id": 293106,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990057.438, "dur": 0.219, + "args": { + "External id": 293107,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990058.335, "dur": 0.234, + "args": { + "External id": 293108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990060.907, "dur": 0.197, + "args": { + "External id": 293109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990061.833, "dur": 0.202, + "args": { + "External id": 293110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990062.775, "dur": 0.218, + "args": { + "External id": 293111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990063.694, "dur": 0.202, + "args": { + "External id": 293112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990064.702, "dur": 0.213, + "args": { + "External id": 293113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990065.693, "dur": 0.210, + "args": { + "External id": 293114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990067.188, "dur": 0.199, + "args": { + "External id": 293115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990068.185, "dur": 0.207, + "args": { + "External id": 293116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990070.783, "dur": 0.200, + "args": { + "External id": 293117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990071.691, "dur": 0.203, + "args": { + "External id": 293118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990072.603, "dur": 0.196, + "args": { + "External id": 293119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990073.491, "dur": 0.204, + "args": { + "External id": 293120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990074.478, "dur": 0.208, + "args": { + "External id": 293121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990075.479, "dur": 0.214, + "args": { + "External id": 293122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990076.381, "dur": 0.205, + "args": { + "External id": 293123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990077.297, "dur": 0.203, + "args": { + "External id": 293124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990080.267, "dur": 0.198, + "args": { + "External id": 293125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990081.151, "dur": 0.199, + "args": { + "External id": 293126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990082.052, "dur": 0.199, + "args": { + "External id": 293127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990082.931, "dur": 0.204, + "args": { + "External id": 293128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990083.853, "dur": 0.198, + "args": { + "External id": 293129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990084.844, "dur": 0.201, + "args": { + "External id": 293130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990085.848, "dur": 0.197, + "args": { + "External id": 293131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990086.831, "dur": 0.204, + "args": { + "External id": 293132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990089.954, "dur": 0.199, + "args": { + "External id": 293133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990090.943, "dur": 0.205, + "args": { + "External id": 293134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990091.952, "dur": 0.200, + "args": { + "External id": 293135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990092.851, "dur": 0.200, + "args": { + "External id": 293136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990093.880, "dur": 0.200, + "args": { + "External id": 293137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990094.873, "dur": 0.202, + "args": { + "External id": 293138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990096.197, "dur": 0.202, + "args": { + "External id": 293139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990097.110, "dur": 0.206, + "args": { + "External id": 293140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990100.021, "dur": 0.205, + "args": { + "External id": 293141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990100.968, "dur": 0.202, + "args": { + "External id": 293142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990101.950, "dur": 0.209, + "args": { + "External id": 293143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990102.839, "dur": 0.205, + "args": { + "External id": 293144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990103.752, "dur": 0.197, + "args": { + "External id": 293145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990104.674, "dur": 0.229, + "args": { + "External id": 293146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990105.601, "dur": 0.204, + "args": { + "External id": 293147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990106.497, "dur": 0.409, + "args": { + "External id": 293148,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990109.231, "dur": 0.206, + "args": { + "External id": 293149,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990110.129, "dur": 0.397, + "args": { + "External id": 293150,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990111.500, "dur": 0.398, + "args": { + "External id": 293151,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990112.587, "dur": 0.239, + "args": { + "External id": 293152,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990113.522, "dur": 0.397, + "args": { + "External id": 293153,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990114.643, "dur": 0.428, + "args": { + "External id": 293154,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990115.758, "dur": 0.200, + "args": { + "External id": 293155,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990116.638, "dur": 0.412, + "args": { + "External id": 293156,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990119.708, "dur": 0.177, + "args": { + "External id": 293157,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990120.579, "dur": 0.239, + "args": { + "External id": 293158,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990121.494, "dur": 0.198, + "args": { + "External id": 293159,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990122.446, "dur": 0.201, + "args": { + "External id": 293160,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990123.355, "dur": 0.212, + "args": { + "External id": 293161,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990124.506, "dur": 0.335, + "args": { + "External id": 293162,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990125.530, "dur": 0.328, + "args": { + "External id": 293163,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990126.759, "dur": 0.353, + "args": { + "External id": 293164,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990129.937, "dur": 0.203, + "args": { + "External id": 293165,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990130.824, "dur": 0.455, + "args": { + "External id": 293166,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990132.375, "dur": 0.214, + "args": { + "External id": 293167,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990133.399, "dur": 0.200, + "args": { + "External id": 293168,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990134.314, "dur": 0.258, + "args": { + "External id": 293169,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990135.325, "dur": 0.234, + "args": { + "External id": 293170,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990136.255, "dur": 0.236, + "args": { + "External id": 293171,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990137.292, "dur": 0.397, + "args": { + "External id": 293172,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990140.445, "dur": 0.218, + "args": { + "External id": 293173,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990141.352, "dur": 0.211, + "args": { + "External id": 293174,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990142.281, "dur": 0.238, + "args": { + "External id": 293175,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990143.241, "dur": 0.347, + "args": { + "External id": 293176,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990144.332, "dur": 0.353, + "args": { + "External id": 293177,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990145.417, "dur": 0.353, + "args": { + "External id": 293178,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990146.781, "dur": 0.347, + "args": { + "External id": 293179,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990147.946, "dur": 0.339, + "args": { + "External id": 293180,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990150.923, "dur": 0.205, + "args": { + "External id": 293181,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990151.834, "dur": 0.212, + "args": { + "External id": 293182,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990152.738, "dur": 0.205, + "args": { + "External id": 293183,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990153.625, "dur": 0.210, + "args": { + "External id": 293184,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990154.536, "dur": 0.206, + "args": { + "External id": 293185,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990155.450, "dur": 0.205, + "args": { + "External id": 293186,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990156.381, "dur": 0.201, + "args": { + "External id": 293187,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990157.299, "dur": 0.205, + "args": { + "External id": 293188,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990161.530, "dur": 0.223, + "args": { + "External id": 293189,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990162.520, "dur": 0.233, + "args": { + "External id": 293190,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990163.450, "dur": 0.205, + "args": { + "External id": 293191,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990164.402, "dur": 0.206, + "args": { + "External id": 293192,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990179.307, "dur": 0.560, + "args": { + "External id": 293193,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990181.201, "dur": 0.211, + "args": { + "External id": 293194,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990182.242, "dur": 0.218, + "args": { + "External id": 293195,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990183.221, "dur": 0.221, + "args": { + "External id": 293196,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990185.751, "dur": 0.209, + "args": { + "External id": 293197,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990186.779, "dur": 0.216, + "args": { + "External id": 293198,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990188.252, "dur": 0.218, + "args": { + "External id": 293199,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990189.307, "dur": 0.243, + "args": { + "External id": 293200,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990190.250, "dur": 0.217, + "args": { + "External id": 293201,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990191.187, "dur": 0.209, + "args": { + "External id": 293202,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990192.326, "dur": 0.220, + "args": { + "External id": 293203,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990193.274, "dur": 0.213, + "args": { + "External id": 293204,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990196.327, "dur": 0.226, + "args": { + "External id": 293205,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990197.260, "dur": 0.215, + "args": { + "External id": 293206,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990198.201, "dur": 0.244, + "args": { + "External id": 293207,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990199.195, "dur": 0.441, + "args": { + "External id": 293208,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990200.393, "dur": 0.395, + "args": { + "External id": 293209,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990201.509, "dur": 0.221, + "args": { + "External id": 293210,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990202.473, "dur": 0.214, + "args": { + "External id": 293211,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990203.539, "dur": 0.207, + "args": { + "External id": 293212,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990206.872, "dur": 0.218, + "args": { + "External id": 293213,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990207.859, "dur": 0.211, + "args": { + "External id": 293214,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990208.798, "dur": 0.210, + "args": { + "External id": 293215,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990209.875, "dur": 0.264, + "args": { + "External id": 293216,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990211.154, "dur": 0.391, + "args": { + "External id": 293217,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990212.338, "dur": 0.340, + "args": { + "External id": 293218,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990213.400, "dur": 0.332, + "args": { + "External id": 293219,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990214.512, "dur": 0.335, + "args": { + "External id": 293220,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990217.400, "dur": 0.216, + "args": { + "External id": 293221,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990218.505, "dur": 0.333, + "args": { + "External id": 293222,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990219.760, "dur": 0.245, + "args": { + "External id": 293223,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990220.716, "dur": 0.325, + "args": { + "External id": 293224,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990221.843, "dur": 0.348, + "args": { + "External id": 293225,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990222.931, "dur": 0.221, + "args": { + "External id": 293226,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990223.874, "dur": 0.212, + "args": { + "External id": 293227,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990224.815, "dur": 0.223, + "args": { + "External id": 293228,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990227.727, "dur": 0.213, + "args": { + "External id": 293229,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990228.657, "dur": 0.218, + "args": { + "External id": 293230,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990229.611, "dur": 0.227, + "args": { + "External id": 293231,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990230.547, "dur": 0.207, + "args": { + "External id": 293232,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990231.482, "dur": 0.211, + "args": { + "External id": 293233,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990232.478, "dur": 0.239, + "args": { + "External id": 293234,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990233.888, "dur": 0.239, + "args": { + "External id": 293235,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990234.896, "dur": 0.228, + "args": { + "External id": 293236,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990237.660, "dur": 0.213, + "args": { + "External id": 293237,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990238.714, "dur": 0.213, + "args": { + "External id": 293238,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990239.666, "dur": 0.450, + "args": { + "External id": 293239,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990240.816, "dur": 0.390, + "args": { + "External id": 293240,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990241.949, "dur": 0.237, + "args": { + "External id": 293241,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990242.902, "dur": 0.463, + "args": { + "External id": 293242,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990244.087, "dur": 0.205, + "args": { + "External id": 293243,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990245.056, "dur": 0.234, + "args": { + "External id": 293244,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990247.541, "dur": 0.404, + "args": { + "External id": 293245,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990248.894, "dur": 0.208, + "args": { + "External id": 293246,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990249.820, "dur": 0.278, + "args": { + "External id": 293247,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990250.827, "dur": 0.328, + "args": { + "External id": 293248,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990251.869, "dur": 0.225, + "args": { + "External id": 293249,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990252.836, "dur": 0.222, + "args": { + "External id": 293250,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990253.808, "dur": 0.211, + "args": { + "External id": 293251,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990254.769, "dur": 0.224, + "args": { + "External id": 293252,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990257.616, "dur": 0.241, + "args": { + "External id": 293253,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990258.654, "dur": 0.199, + "args": { + "External id": 293254,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990259.582, "dur": 0.207, + "args": { + "External id": 293255,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990260.566, "dur": 0.214, + "args": { + "External id": 293256,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368990261.493, "dur": 0.219, + "args": { + "External id": 293257,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070547, "tid": 2070547, + "ts": 5333368990317.711, "dur": 1645.672, + "args": { + "External id": 293258,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070547, "tid": 2070547, + "ts": 5333368990796.300, "dur": 1090.153, + "args": { + "External id": 293259,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990809.036, "dur": 7.538, + "args": { + "External id": 293260,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990813.144, "dur": 2.896, + "args": { + "External id": 293261,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990817.112, "dur": 2.985, + "args": { + "External id": 293262,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990818.157, "dur": 1.810, + "args": { + "External id": 293263,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990820.633, "dur": 3.142, + "args": { + "External id": 293264,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990822.596, "dur": 1.106, + "args": { + "External id": 293265,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990826.231, "dur": 1.788, + "args": { + "External id": 293266,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990827.045, "dur": 0.901, + "args": { + "External id": 293267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990828.355, "dur": 4.032, + "args": { + "External id": 293268,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990831.807, "dur": 0.513, + "args": { + "External id": 293269,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990832.739, "dur": 1.330, + "args": { + "External id": 293270,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990833.371, "dur": 0.633, + "args": { + "External id": 293271,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990834.298, "dur": 1.439, + "args": { + "External id": 293272,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990834.998, "dur": 0.673, + "args": { + "External id": 293273,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990836.107, "dur": 2.910, + "args": { + "External id": 293274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990838.360, "dur": 0.589, + "args": { + "External id": 293275,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990839.354, "dur": 1.158, + "args": { + "External id": 293276,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990839.960, "dur": 0.484, + "args": { + "External id": 293277,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990840.737, "dur": 2.872, + "args": { + "External id": 293278,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990841.302, "dur": 2.235, + "args": { + "External id": 293279,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990843.838, "dur": 2.185, + "args": { + "External id": 293280,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990845.485, "dur": 0.472, + "args": { + "External id": 293281,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990848.197, "dur": 1.326, + "args": { + "External id": 293282,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990848.579, "dur": 0.875, + "args": { + "External id": 293283,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990849.768, "dur": 4.087, + "args": { + "External id": 293284,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990853.071, "dur": 0.718, + "args": { + "External id": 293285,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990854.105, "dur": 1.487, + "args": { + "External id": 293286,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990854.803, "dur": 0.721, + "args": { + "External id": 293287,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990855.975, "dur": 1.485, + "args": { + "External id": 293288,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990856.628, "dur": 0.762, + "args": { + "External id": 293289,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990857.700, "dur": 2.959, + "args": { + "External id": 293290,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990859.976, "dur": 0.617, + "args": { + "External id": 293291,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990860.884, "dur": 1.252, + "args": { + "External id": 293292,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990861.524, "dur": 0.546, + "args": { + "External id": 293293,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990862.371, "dur": 2.582, + "args": { + "External id": 293294,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990862.908, "dur": 1.974, + "args": { + "External id": 293295,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990865.283, "dur": 2.599, + "args": { + "External id": 293296,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990866.890, "dur": 0.926, + "args": { + "External id": 293297,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990869.982, "dur": 1.351, + "args": { + "External id": 293298,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990870.354, "dur": 0.914, + "args": { + "External id": 293299,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990871.583, "dur": 4.252, + "args": { + "External id": 293300,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990875.311, "dur": 0.455, + "args": { + "External id": 293301,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990876.088, "dur": 1.335, + "args": { + "External id": 293302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990876.654, "dur": 0.700, + "args": { + "External id": 293303,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990877.646, "dur": 1.469, + "args": { + "External id": 293304,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990878.235, "dur": 0.813, + "args": { + "External id": 293305,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990879.468, "dur": 3.096, + "args": { + "External id": 293306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990881.721, "dur": 0.776, + "args": { + "External id": 293307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990882.884, "dur": 1.146, + "args": { + "External id": 293308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990883.361, "dur": 0.604, + "args": { + "External id": 293309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990884.345, "dur": 3.094, + "args": { + "External id": 293310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990885.229, "dur": 2.138, + "args": { + "External id": 293311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990887.707, "dur": 2.853, + "args": { + "External id": 293312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990889.908, "dur": 0.567, + "args": { + "External id": 293313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990892.388, "dur": 1.395, + "args": { + "External id": 293314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990893.040, "dur": 0.676, + "args": { + "External id": 293315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990894.206, "dur": 4.142, + "args": { + "External id": 293316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990897.800, "dur": 0.481, + "args": { + "External id": 293317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990898.741, "dur": 1.544, + "args": { + "External id": 293318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990899.536, "dur": 0.681, + "args": { + "External id": 293319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990900.638, "dur": 1.885, + "args": { + "External id": 293320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990901.238, "dur": 0.975, + "args": { + "External id": 293321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990902.743, "dur": 2.910, + "args": { + "External id": 293322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990904.764, "dur": 0.823, + "args": { + "External id": 293323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990905.900, "dur": 2.262, + "args": { + "External id": 293324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990907.400, "dur": 0.697, + "args": { + "External id": 293325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990908.409, "dur": 3.153, + "args": { + "External id": 293326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990909.301, "dur": 2.092, + "args": { + "External id": 293327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990911.912, "dur": 1.847, + "args": { + "External id": 293328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990913.078, "dur": 0.617, + "args": { + "External id": 293329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990915.652, "dur": 2.265, + "args": { + "External id": 293330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990916.863, "dur": 0.889, + "args": { + "External id": 293331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990918.164, "dur": 3.728, + "args": { + "External id": 293332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990921.272, "dur": 0.547, + "args": { + "External id": 293333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990922.299, "dur": 1.583, + "args": { + "External id": 293334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990923.194, "dur": 0.438, + "args": { + "External id": 293335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990924.129, "dur": 2.105, + "args": { + "External id": 293336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990925.222, "dur": 0.850, + "args": { + "External id": 293337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990926.485, "dur": 3.241, + "args": { + "External id": 293338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990928.932, "dur": 0.729, + "args": { + "External id": 293339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990929.999, "dur": 2.020, + "args": { + "External id": 293340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990931.214, "dur": 0.738, + "args": { + "External id": 293341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990932.290, "dur": 2.901, + "args": { + "External id": 293342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990932.899, "dur": 2.131, + "args": { + "External id": 293343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990935.444, "dur": 2.020, + "args": { + "External id": 293344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990936.825, "dur": 0.484, + "args": { + "External id": 293345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990939.633, "dur": 1.788, + "args": { + "External id": 293346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990940.712, "dur": 0.643, + "args": { + "External id": 293347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990941.797, "dur": 3.523, + "args": { + "External id": 293348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990944.791, "dur": 0.464, + "args": { + "External id": 293349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990945.693, "dur": 2.210, + "args": { + "External id": 293350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990946.928, "dur": 0.747, + "args": { + "External id": 293351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990948.255, "dur": 2.383, + "args": { + "External id": 293352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990949.464, "dur": 1.110, + "args": { + "External id": 293353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990950.904, "dur": 3.390, + "args": { + "External id": 293354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990953.490, "dur": 0.741, + "args": { + "External id": 293355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990954.677, "dur": 2.577, + "args": { + "External id": 293356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990956.343, "dur": 0.738, + "args": { + "External id": 293357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990957.509, "dur": 3.088, + "args": { + "External id": 293358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990958.564, "dur": 1.959, + "args": { + "External id": 293359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990960.892, "dur": 1.878, + "args": { + "External id": 293360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990962.005, "dur": 0.697, + "args": { + "External id": 293361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990964.445, "dur": 2.036, + "args": { + "External id": 293362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990965.661, "dur": 0.755, + "args": { + "External id": 293363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990966.735, "dur": 4.026, + "args": { + "External id": 293364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990969.971, "dur": 0.727, + "args": { + "External id": 293365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990971.031, "dur": 1.758, + "args": { + "External id": 293366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990972.242, "dur": 0.482, + "args": { + "External id": 293367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990973.049, "dur": 2.054, + "args": { + "External id": 293368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990974.217, "dur": 0.824, + "args": { + "External id": 293369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990975.460, "dur": 3.317, + "args": { + "External id": 293370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990977.994, "dur": 0.609, + "args": { + "External id": 293371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990979.128, "dur": 1.980, + "args": { + "External id": 293372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990980.408, "dur": 0.635, + "args": { + "External id": 293373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990981.365, "dur": 2.841, + "args": { + "External id": 293374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368990982.136, "dur": 1.997, + "args": { + "External id": 293375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368990984.461, "dur": 25.908, + "args": { + "External id": 293376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991008.312, "dur": 1.975, + "args": { + "External id": 298497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991012.649, "dur": 3.283, + "args": { + "External id": 298498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991013.592, "dur": 2.140, + "args": { + "External id": 298499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991016.219, "dur": 1.898, + "args": { + "External id": 298500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991017.656, "dur": 0.385, + "args": { + "External id": 298501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991018.365, "dur": 1.368, + "args": { + "External id": 298502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991019.232, "dur": 0.421, + "args": { + "External id": 298503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991019.988, "dur": 3.628, + "args": { + "External id": 298504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991023.133, "dur": 0.412, + "args": { + "External id": 298505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991023.890, "dur": 1.518, + "args": { + "External id": 298506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991024.572, "dur": 0.772, + "args": { + "External id": 298507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991025.689, "dur": 6.596, + "args": { + "External id": 298508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991030.293, "dur": 1.919, + "args": { + "External id": 298509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991032.563, "dur": 1.426, + "args": { + "External id": 298510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991033.209, "dur": 0.522, + "args": { + "External id": 298511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991034.451, "dur": 1.991, + "args": { + "External id": 298512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991035.723, "dur": 0.641, + "args": { + "External id": 298513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991038.443, "dur": 3.282, + "args": { + "External id": 298514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991041.124, "dur": 0.536, + "args": { + "External id": 298515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991042.040, "dur": 1.780, + "args": { + "External id": 298516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991043.009, "dur": 0.548, + "args": { + "External id": 298517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991044.070, "dur": 1.354, + "args": { + "External id": 298518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991044.720, "dur": 0.629, + "args": { + "External id": 298519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991045.697, "dur": 3.041, + "args": { + "External id": 298520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991048.025, "dur": 0.639, + "args": { + "External id": 298521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991048.989, "dur": 1.709, + "args": { + "External id": 298522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991050.117, "dur": 0.516, + "args": { + "External id": 298523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991050.982, "dur": 3.621, + "args": { + "External id": 298524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991052.292, "dur": 2.031, + "args": { + "External id": 298525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991054.890, "dur": 1.603, + "args": { + "External id": 298526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991055.808, "dur": 0.622, + "args": { + "External id": 298527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991056.740, "dur": 1.915, + "args": { + "External id": 298528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991057.973, "dur": 0.610, + "args": { + "External id": 298529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991061.061, "dur": 3.688, + "args": { + "External id": 298530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991064.056, "dur": 0.626, + "args": { + "External id": 298531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991065.050, "dur": 2.368, + "args": { + "External id": 298532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991066.680, "dur": 0.673, + "args": { + "External id": 298533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991067.700, "dur": 1.589, + "args": { + "External id": 298534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991068.513, "dur": 0.702, + "args": { + "External id": 298535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991069.562, "dur": 3.320, + "args": { + "External id": 298536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991072.138, "dur": 0.678, + "args": { + "External id": 298537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991073.145, "dur": 1.480, + "args": { + "External id": 298538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991074.014, "dur": 0.546, + "args": { + "External id": 298539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991074.889, "dur": 3.182, + "args": { + "External id": 298540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991076.044, "dur": 1.947, + "args": { + "External id": 298541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991078.337, "dur": 1.202, + "args": { + "External id": 298542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991078.991, "dur": 0.482, + "args": { + "External id": 298543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991079.975, "dur": 2.300, + "args": { + "External id": 298544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991081.308, "dur": 0.672, + "args": { + "External id": 298545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991084.441, "dur": 3.262, + "args": { + "External id": 298546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991086.991, "dur": 0.646, + "args": { + "External id": 298547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991087.981, "dur": 2.366, + "args": { + "External id": 298548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991089.189, "dur": 0.839, + "args": { + "External id": 298549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991090.622, "dur": 1.690, + "args": { + "External id": 298550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991091.394, "dur": 0.844, + "args": { + "External id": 298551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991092.560, "dur": 3.732, + "args": { + "External id": 298552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991095.624, "dur": 0.600, + "args": { + "External id": 298553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991096.562, "dur": 1.577, + "args": { + "External id": 298554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991097.280, "dur": 0.794, + "args": { + "External id": 298555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991098.435, "dur": 3.134, + "args": { + "External id": 298556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991099.578, "dur": 1.915, + "args": { + "External id": 298557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991101.823, "dur": 1.572, + "args": { + "External id": 298558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991102.549, "dur": 0.781, + "args": { + "External id": 298559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991103.644, "dur": 2.331, + "args": { + "External id": 298560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991105.227, "dur": 0.675, + "args": { + "External id": 298561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991108.060, "dur": 2.993, + "args": { + "External id": 298562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991110.441, "dur": 0.544, + "args": { + "External id": 298563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991111.370, "dur": 1.966, + "args": { + "External id": 298564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991112.559, "dur": 0.511, + "args": { + "External id": 298565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991113.599, "dur": 1.792, + "args": { + "External id": 298566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991114.512, "dur": 0.810, + "args": { + "External id": 298567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991115.642, "dur": 3.417, + "args": { + "External id": 298568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991118.387, "dur": 0.603, + "args": { + "External id": 298569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991119.323, "dur": 2.149, + "args": { + "External id": 298570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991120.606, "dur": 0.802, + "args": { + "External id": 298571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991121.760, "dur": 3.540, + "args": { + "External id": 298572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991123.291, "dur": 1.937, + "args": { + "External id": 298573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991125.583, "dur": 1.303, + "args": { + "External id": 298574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991126.293, "dur": 0.524, + "args": { + "External id": 298575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991127.133, "dur": 1.958, + "args": { + "External id": 298576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991128.129, "dur": 0.891, + "args": { + "External id": 298577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991131.141, "dur": 3.041, + "args": { + "External id": 298578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991133.498, "dur": 0.611, + "args": { + "External id": 298579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991134.457, "dur": 1.934, + "args": { + "External id": 298580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991135.922, "dur": 0.406, + "args": { + "External id": 298581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991136.672, "dur": 1.570, + "args": { + "External id": 298582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991137.498, "dur": 0.678, + "args": { + "External id": 298583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991138.491, "dur": 3.963, + "args": { + "External id": 298584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991141.675, "dur": 0.604, + "args": { + "External id": 298585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991142.743, "dur": 1.523, + "args": { + "External id": 298586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991143.708, "dur": 0.494, + "args": { + "External id": 298587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991144.548, "dur": 3.340, + "args": { + "External id": 298588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991145.594, "dur": 2.218, + "args": { + "External id": 298589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991149.700, "dur": 1.442, + "args": { + "External id": 298590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991150.615, "dur": 0.458, + "args": { + "External id": 298591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991151.550, "dur": 3.613, + "args": { + "External id": 298592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991152.982, "dur": 1.993, + "args": { + "External id": 298593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991157.023, "dur": 1.704, + "args": { + "External id": 298594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991158.010, "dur": 0.652, + "args": { + "External id": 298595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991158.982, "dur": 1.980, + "args": { + "External id": 298596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991160.089, "dur": 0.805, + "args": { + "External id": 298597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991161.210, "dur": 15.473, + "args": { + "External id": 298598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991164.328, "dur": 11.747, + "args": { + "External id": 298599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991177.960, "dur": 1.979, + "args": { + "External id": 298600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991179.277, "dur": 0.597, + "args": { + "External id": 298601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991180.337, "dur": 1.368, + "args": { + "External id": 298602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991181.032, "dur": 0.603, + "args": { + "External id": 298603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991181.994, "dur": 3.380, + "args": { + "External id": 298604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991184.622, "dur": 0.684, + "args": { + "External id": 298605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991185.875, "dur": 1.846, + "args": { + "External id": 298606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991186.794, "dur": 0.858, + "args": { + "External id": 298607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991188.115, "dur": 3.355, + "args": { + "External id": 298608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991189.399, "dur": 1.806, + "args": { + "External id": 298609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991193.423, "dur": 1.351, + "args": { + "External id": 298610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991194.117, "dur": 0.590, + "args": { + "External id": 298611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991195.041, "dur": 1.556, + "args": { + "External id": 298612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991195.873, "dur": 0.657, + "args": { + "External id": 298613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991196.861, "dur": 3.087, + "args": { + "External id": 298614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991199.403, "dur": 0.481, + "args": { + "External id": 298615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991200.228, "dur": 1.782, + "args": { + "External id": 298616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991201.409, "dur": 0.534, + "args": { + "External id": 298617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991202.501, "dur": 1.558, + "args": { + "External id": 298618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991203.258, "dur": 0.731, + "args": { + "External id": 298619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991204.470, "dur": 2.991, + "args": { + "External id": 298620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991206.926, "dur": 0.468, + "args": { + "External id": 298621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991207.864, "dur": 1.535, + "args": { + "External id": 298622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991208.712, "dur": 0.618, + "args": { + "External id": 298623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991209.666, "dur": 3.109, + "args": { + "External id": 298624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991211.030, "dur": 1.672, + "args": { + "External id": 298625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991214.834, "dur": 1.265, + "args": { + "External id": 298626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991215.474, "dur": 0.557, + "args": { + "External id": 298627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991216.393, "dur": 1.593, + "args": { + "External id": 298628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991217.247, "dur": 0.673, + "args": { + "External id": 298629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991218.252, "dur": 3.530, + "args": { + "External id": 298630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991220.862, "dur": 0.857, + "args": { + "External id": 298631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991222.081, "dur": 1.722, + "args": { + "External id": 298632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991223.254, "dur": 0.485, + "args": { + "External id": 298633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991224.352, "dur": 1.772, + "args": { + "External id": 298634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991225.298, "dur": 0.760, + "args": { + "External id": 298635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991226.398, "dur": 3.794, + "args": { + "External id": 298636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991229.281, "dur": 0.846, + "args": { + "External id": 298637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991230.447, "dur": 1.380, + "args": { + "External id": 298638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991231.218, "dur": 0.545, + "args": { + "External id": 298639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991232.229, "dur": 3.035, + "args": { + "External id": 298640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991233.305, "dur": 1.887, + "args": { + "External id": 298641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991237.171, "dur": 1.425, + "args": { + "External id": 298642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991237.931, "dur": 0.598, + "args": { + "External id": 298643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991238.863, "dur": 1.596, + "args": { + "External id": 298644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991239.750, "dur": 0.640, + "args": { + "External id": 298645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991240.918, "dur": 3.784, + "args": { + "External id": 298646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991243.905, "dur": 0.729, + "args": { + "External id": 298647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991245.025, "dur": 2.187, + "args": { + "External id": 298648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991246.535, "dur": 0.615, + "args": { + "External id": 298649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991247.607, "dur": 1.699, + "args": { + "External id": 298650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991248.645, "dur": 0.595, + "args": { + "External id": 298651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991249.577, "dur": 3.060, + "args": { + "External id": 298652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991251.997, "dur": 0.575, + "args": { + "External id": 298653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991252.909, "dur": 1.413, + "args": { + "External id": 298654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991253.795, "dur": 0.463, + "args": { + "External id": 298655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991254.575, "dur": 3.524, + "args": { + "External id": 298656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991255.845, "dur": 1.992, + "args": { + "External id": 298657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991260.487, "dur": 1.572, + "args": { + "External id": 298658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991261.396, "dur": 0.595, + "args": { + "External id": 298659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991262.311, "dur": 1.718, + "args": { + "External id": 298660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991263.203, "dur": 0.760, + "args": { + "External id": 298661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991264.297, "dur": 3.524, + "args": { + "External id": 298662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991266.919, "dur": 0.833, + "args": { + "External id": 298663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991268.174, "dur": 2.085, + "args": { + "External id": 298664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991269.519, "dur": 0.672, + "args": { + "External id": 298665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991270.523, "dur": 1.388, + "args": { + "External id": 298666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991271.182, "dur": 0.658, + "args": { + "External id": 298667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991272.176, "dur": 3.411, + "args": { + "External id": 298668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991275.078, "dur": 0.443, + "args": { + "External id": 298669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991275.855, "dur": 1.625, + "args": { + "External id": 298670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991276.881, "dur": 0.526, + "args": { + "External id": 298671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991277.758, "dur": 2.640, + "args": { + "External id": 298672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991278.652, "dur": 1.676, + "args": { + "External id": 298673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991282.141, "dur": 1.377, + "args": { + "External id": 298674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991282.983, "dur": 0.469, + "args": { + "External id": 298675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991283.875, "dur": 1.840, + "args": { + "External id": 298676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991284.975, "dur": 0.673, + "args": { + "External id": 298677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991285.984, "dur": 2.869, + "args": { + "External id": 298678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991288.075, "dur": 0.713, + "args": { + "External id": 298679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991289.131, "dur": 2.017, + "args": { + "External id": 298680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991290.460, "dur": 0.621, + "args": { + "External id": 298681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991291.426, "dur": 2.001, + "args": { + "External id": 298682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991292.769, "dur": 0.589, + "args": { + "External id": 298683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991293.813, "dur": 3.178, + "args": { + "External id": 298684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991296.337, "dur": 0.590, + "args": { + "External id": 298685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991297.318, "dur": 1.562, + "args": { + "External id": 298686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991298.074, "dur": 0.740, + "args": { + "External id": 298687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991299.150, "dur": 3.192, + "args": { + "External id": 298688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991300.244, "dur": 1.954, + "args": { + "External id": 298689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991304.267, "dur": 1.512, + "args": { + "External id": 298690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991305.233, "dur": 0.478, + "args": { + "External id": 298691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991306.069, "dur": 2.327, + "args": { + "External id": 298692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991307.464, "dur": 0.863, + "args": { + "External id": 298693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991308.678, "dur": 3.274, + "args": { + "External id": 298694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991311.138, "dur": 0.747, + "args": { + "External id": 298695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991312.242, "dur": 1.511, + "args": { + "External id": 298696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991313.125, "dur": 0.562, + "args": { + "External id": 298697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991314.196, "dur": 1.894, + "args": { + "External id": 298698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991315.118, "dur": 0.906, + "args": { + "External id": 298699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991316.345, "dur": 3.730, + "args": { + "External id": 298700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991319.420, "dur": 0.590, + "args": { + "External id": 298701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991320.329, "dur": 1.402, + "args": { + "External id": 298702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991321.206, "dur": 0.460, + "args": { + "External id": 298703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991322.025, "dur": 3.365, + "args": { + "External id": 298704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991323.316, "dur": 1.986, + "args": { + "External id": 298705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991327.262, "dur": 1.360, + "args": { + "External id": 298706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991327.955, "dur": 0.596, + "args": { + "External id": 298707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991328.897, "dur": 1.899, + "args": { + "External id": 298708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991330.176, "dur": 0.555, + "args": { + "External id": 298709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991331.073, "dur": 3.465, + "args": { + "External id": 298710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991333.556, "dur": 0.916, + "args": { + "External id": 298711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991334.816, "dur": 1.738, + "args": { + "External id": 298712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991336.036, "dur": 0.453, + "args": { + "External id": 298713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991336.844, "dur": 1.774, + "args": { + "External id": 298714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991337.679, "dur": 0.869, + "args": { + "External id": 298715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991338.922, "dur": 3.166, + "args": { + "External id": 298716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991341.324, "dur": 0.700, + "args": { + "External id": 298717,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991342.564, "dur": 1.253, + "args": { + "External id": 298718,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991343.196, "dur": 0.557, + "args": { + "External id": 298719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991344.291, "dur": 3.355, + "args": { + "External id": 298720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991345.130, "dur": 2.445, + "args": { + "External id": 298721,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991349.760, "dur": 1.848, + "args": { + "External id": 298722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991350.649, "dur": 0.891, + "args": { + "External id": 298723,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991351.910, "dur": 1.847, + "args": { + "External id": 298724,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991353.124, "dur": 0.568, + "args": { + "External id": 298725,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991354.246, "dur": 2.763, + "args": { + "External id": 298726,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991356.486, "dur": 0.460, + "args": { + "External id": 298727,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991357.293, "dur": 1.638, + "args": { + "External id": 298728,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991358.424, "dur": 0.439, + "args": { + "External id": 298729,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991359.214, "dur": 1.590, + "args": { + "External id": 298730,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991360.277, "dur": 0.458, + "args": { + "External id": 298731,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991361.055, "dur": 3.162, + "args": { + "External id": 298732,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991363.589, "dur": 0.555, + "args": { + "External id": 298733,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991364.469, "dur": 1.308, + "args": { + "External id": 298734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991365.176, "dur": 0.540, + "args": { + "External id": 298735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991366.064, "dur": 3.078, + "args": { + "External id": 298736,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991367.199, "dur": 1.875, + "args": { + "External id": 298737,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991371.028, "dur": 1.512, + "args": { + "External id": 298738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991371.864, "dur": 0.610, + "args": { + "External id": 298739,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991373.143, "dur": 2.323, + "args": { + "External id": 298740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991374.721, "dur": 0.680, + "args": { + "External id": 298741,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991375.773, "dur": 2.774, + "args": { + "External id": 298742,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991378.016, "dur": 0.461, + "args": { + "External id": 298743,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991379.035, "dur": 2.026, + "args": { + "External id": 298744,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991380.255, "dur": 0.738, + "args": { + "External id": 298745,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991381.310, "dur": 1.676, + "args": { + "External id": 298746,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991382.106, "dur": 0.815, + "args": { + "External id": 298747,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991383.342, "dur": 3.333, + "args": { + "External id": 298748,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991386.034, "dur": 0.575, + "args": { + "External id": 298749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991386.960, "dur": 1.473, + "args": { + "External id": 298750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991387.742, "dur": 0.622, + "args": { + "External id": 298751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991388.701, "dur": 3.545, + "args": { + "External id": 298752,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991390.119, "dur": 2.049, + "args": { + "External id": 298753,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991394.035, "dur": 1.189, + "args": { + "External id": 298754,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991394.721, "dur": 0.434, + "args": { + "External id": 298755,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991395.475, "dur": 2.132, + "args": { + "External id": 298756,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991396.768, "dur": 0.773, + "args": { + "External id": 298757,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991398.073, "dur": 2.890, + "args": { + "External id": 298758,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991400.388, "dur": 0.511, + "args": { + "External id": 298759,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991401.253, "dur": 2.072, + "args": { + "External id": 298760,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991402.648, "dur": 0.612, + "args": { + "External id": 298761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991403.609, "dur": 1.409, + "args": { + "External id": 298762,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991404.333, "dur": 0.615, + "args": { + "External id": 298763,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991405.284, "dur": 3.532, + "args": { + "External id": 298764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991407.973, "dur": 0.774, + "args": { + "External id": 298765,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991409.070, "dur": 1.532, + "args": { + "External id": 298766,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991410.043, "dur": 0.493, + "args": { + "External id": 298767,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991410.893, "dur": 3.185, + "args": { + "External id": 298768,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991412.344, "dur": 1.665, + "args": { + "External id": 298769,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991416.368, "dur": 1.443, + "args": { + "External id": 298770,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991417.224, "dur": 0.521, + "args": { + "External id": 298771,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991418.115, "dur": 2.131, + "args": { + "External id": 298772,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991419.488, "dur": 0.695, + "args": { + "External id": 298773,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991420.597, "dur": 2.998, + "args": { + "External id": 298774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991422.984, "dur": 0.548, + "args": { + "External id": 298775,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991423.917, "dur": 2.071, + "args": { + "External id": 298776,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991425.099, "dur": 0.826, + "args": { + "External id": 298777,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991426.390, "dur": 1.614, + "args": { + "External id": 298778,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991427.268, "dur": 0.669, + "args": { + "External id": 298779,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991428.261, "dur": 3.352, + "args": { + "External id": 298780,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991430.832, "dur": 0.713, + "args": { + "External id": 298781,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991431.978, "dur": 1.445, + "args": { + "External id": 298782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991432.775, "dur": 0.577, + "args": { + "External id": 298783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991433.771, "dur": 3.731, + "args": { + "External id": 298784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991435.232, "dur": 2.196, + "args": { + "External id": 298785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991439.564, "dur": 1.475, + "args": { + "External id": 298786,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991440.457, "dur": 0.515, + "args": { + "External id": 298787,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991441.510, "dur": 2.339, + "args": { + "External id": 298788,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991442.930, "dur": 0.854, + "args": { + "External id": 298789,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991444.104, "dur": 3.254, + "args": { + "External id": 298790,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991446.732, "dur": 0.563, + "args": { + "External id": 298791,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991447.695, "dur": 2.049, + "args": { + "External id": 298792,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991448.782, "dur": 0.898, + "args": { + "External id": 298793,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991450.201, "dur": 1.588, + "args": { + "External id": 298794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991450.809, "dur": 0.912, + "args": { + "External id": 298795,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991452.057, "dur": 3.191, + "args": { + "External id": 298796,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991454.415, "dur": 0.768, + "args": { + "External id": 298797,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991455.497, "dur": 1.680, + "args": { + "External id": 298798,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991456.365, "dur": 0.746, + "args": { + "External id": 298799,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991457.450, "dur": 3.076, + "args": { + "External id": 298800,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991458.366, "dur": 1.871, + "args": { + "External id": 298801,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991462.396, "dur": 1.687, + "args": { + "External id": 298802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991463.243, "dur": 0.773, + "args": { + "External id": 298803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991464.366, "dur": 2.126, + "args": { + "External id": 298804,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991465.845, "dur": 0.581, + "args": { + "External id": 298805,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991466.916, "dur": 3.420, + "args": { + "External id": 298806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991469.524, "dur": 0.745, + "args": { + "External id": 298807,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991470.703, "dur": 1.445, + "args": { + "External id": 298808,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991471.658, "dur": 0.426, + "args": { + "External id": 298809,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991472.401, "dur": 1.722, + "args": { + "External id": 298810,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991473.402, "dur": 0.656, + "args": { + "External id": 298811,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991474.420, "dur": 3.503, + "args": { + "External id": 298812,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991477.269, "dur": 0.590, + "args": { + "External id": 298813,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991478.205, "dur": 1.700, + "args": { + "External id": 298814,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991479.022, "dur": 0.815, + "args": { + "External id": 298815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991480.181, "dur": 2.800, + "args": { + "External id": 298816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991481.219, "dur": 1.689, + "args": { + "External id": 298817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991484.961, "dur": 1.631, + "args": { + "External id": 298818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991486.059, "dur": 0.468, + "args": { + "External id": 298819,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991486.849, "dur": 2.463, + "args": { + "External id": 298820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991488.138, "dur": 1.106, + "args": { + "External id": 298821,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991489.601, "dur": 2.819, + "args": { + "External id": 298822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991491.689, "dur": 0.665, + "args": { + "External id": 298823,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991492.751, "dur": 1.834, + "args": { + "External id": 298824,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991493.881, "dur": 0.638, + "args": { + "External id": 298825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991494.959, "dur": 1.375, + "args": { + "External id": 298826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991495.694, "dur": 0.571, + "args": { + "External id": 298827,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991496.616, "dur": 3.534, + "args": { + "External id": 298828,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991499.088, "dur": 0.998, + "args": { + "External id": 298829,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991500.405, "dur": 1.227, + "args": { + "External id": 298830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991501.165, "dur": 0.400, + "args": { + "External id": 298831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991501.962, "dur": 3.389, + "args": { + "External id": 298832,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991503.074, "dur": 2.206, + "args": { + "External id": 298833,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991507.393, "dur": 1.219, + "args": { + "External id": 298834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991508.072, "dur": 0.472, + "args": { + "External id": 298835,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991509.067, "dur": 2.169, + "args": { + "External id": 298836,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991510.216, "dur": 0.951, + "args": { + "External id": 298837,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991511.491, "dur": 3.192, + "args": { + "External id": 298838,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991514.120, "dur": 0.495, + "args": { + "External id": 298839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991514.959, "dur": 1.915, + "args": { + "External id": 298840,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991516.100, "dur": 0.710, + "args": { + "External id": 298841,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991517.163, "dur": 1.664, + "args": { + "External id": 298842,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991517.945, "dur": 0.817, + "args": { + "External id": 298843,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991519.139, "dur": 3.616, + "args": { + "External id": 298844,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991521.685, "dur": 1.007, + "args": { + "External id": 298845,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991524.247, "dur": 1.397, + "args": { + "External id": 298846,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991525.017, "dur": 0.562, + "args": { + "External id": 298847,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991525.936, "dur": 3.491, + "args": { + "External id": 298848,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991527.025, "dur": 2.330, + "args": { + "External id": 298849,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991531.296, "dur": 1.474, + "args": { + "External id": 298850,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991532.092, "dur": 0.610, + "args": { + "External id": 298851,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991533.170, "dur": 1.571, + "args": { + "External id": 298852,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991534.082, "dur": 0.592, + "args": { + "External id": 298853,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991535.071, "dur": 2.765, + "args": { + "External id": 298854,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991537.214, "dur": 0.557, + "args": { + "External id": 298855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991538.372, "dur": 1.639, + "args": { + "External id": 298856,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991539.463, "dur": 0.481, + "args": { + "External id": 298857,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991540.313, "dur": 1.543, + "args": { + "External id": 298858,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991541.123, "dur": 0.668, + "args": { + "External id": 298859,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991542.209, "dur": 3.303, + "args": { + "External id": 298860,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991544.874, "dur": 0.572, + "args": { + "External id": 298861,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991545.777, "dur": 1.616, + "args": { + "External id": 298862,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991546.673, "dur": 0.656, + "args": { + "External id": 298863,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991547.800, "dur": 2.924, + "args": { + "External id": 298864,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991548.792, "dur": 1.860, + "args": { + "External id": 298865,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991552.654, "dur": 1.352, + "args": { + "External id": 298866,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991553.351, "dur": 0.591, + "args": { + "External id": 298867,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991554.342, "dur": 2.247, + "args": { + "External id": 298868,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991555.725, "dur": 0.795, + "args": { + "External id": 298869,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991556.844, "dur": 2.933, + "args": { + "External id": 298870,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991559.075, "dur": 0.634, + "args": { + "External id": 298871,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991560.070, "dur": 2.352, + "args": { + "External id": 298872,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991561.438, "dur": 0.917, + "args": { + "External id": 298873,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991562.727, "dur": 1.650, + "args": { + "External id": 298874,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991563.544, "dur": 0.765, + "args": { + "External id": 298875,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991564.638, "dur": 4.084, + "args": { + "External id": 298876,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991568.163, "dur": 0.493, + "args": { + "External id": 298877,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991568.993, "dur": 1.625, + "args": { + "External id": 298878,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991569.958, "dur": 0.592, + "args": { + "External id": 298879,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991570.881, "dur": 3.201, + "args": { + "External id": 298880,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991572.149, "dur": 1.860, + "args": { + "External id": 298881,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991576.042, "dur": 1.402, + "args": { + "External id": 298882,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991576.858, "dur": 0.518, + "args": { + "External id": 298883,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991577.895, "dur": 2.287, + "args": { + "External id": 298884,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991579.393, "dur": 0.721, + "args": { + "External id": 298885,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991580.482, "dur": 3.195, + "args": { + "External id": 298886,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991582.864, "dur": 0.745, + "args": { + "External id": 298887,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991584.004, "dur": 1.681, + "args": { + "External id": 298888,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991585.135, "dur": 0.488, + "args": { + "External id": 298889,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991585.944, "dur": 1.543, + "args": { + "External id": 298890,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991586.714, "dur": 0.708, + "args": { + "External id": 298891,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991587.900, "dur": 3.332, + "args": { + "External id": 298892,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991590.369, "dur": 0.795, + "args": { + "External id": 298893,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991591.515, "dur": 1.607, + "args": { + "External id": 298894,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991592.316, "dur": 0.739, + "args": { + "External id": 298895,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991593.387, "dur": 3.537, + "args": { + "External id": 298896,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991595.015, "dur": 1.837, + "args": { + "External id": 298897,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991599.208, "dur": 1.583, + "args": { + "External id": 298898,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991600.094, "dur": 0.631, + "args": { + "External id": 298899,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991601.298, "dur": 2.045, + "args": { + "External id": 298900,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991602.415, "dur": 0.860, + "args": { + "External id": 298901,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991603.609, "dur": 3.204, + "args": { + "External id": 298902,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991606.158, "dur": 0.589, + "args": { + "External id": 298903,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991607.133, "dur": 2.183, + "args": { + "External id": 298904,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991608.370, "dur": 0.882, + "args": { + "External id": 298905,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991609.665, "dur": 1.384, + "args": { + "External id": 298906,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991610.448, "dur": 0.532, + "args": { + "External id": 298907,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991611.316, "dur": 3.259, + "args": { + "External id": 298908,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991614.000, "dur": 0.507, + "args": { + "External id": 298909,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991614.827, "dur": 2.101, + "args": { + "External id": 298910,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991616.111, "dur": 0.748, + "args": { + "External id": 298911,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991617.179, "dur": 30.328, + "args": { + "External id": 298912,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991618.284, "dur": 28.507, + "args": { + "External id": 298913,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991650.619, "dur": 2.220, + "args": { + "External id": 298914,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991652.044, "dur": 0.725, + "args": { + "External id": 298915,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991653.140, "dur": 1.897, + "args": { + "External id": 298916,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991654.282, "dur": 0.689, + "args": { + "External id": 298917,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991655.300, "dur": 2.912, + "args": { + "External id": 298918,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991657.743, "dur": 0.402, + "args": { + "External id": 298919,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991658.488, "dur": 1.513, + "args": { + "External id": 298920,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991659.205, "dur": 0.730, + "args": { + "External id": 298921,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991660.251, "dur": 1.573, + "args": { + "External id": 298922,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991661.037, "dur": 0.722, + "args": { + "External id": 298923,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991662.189, "dur": 3.469, + "args": { + "External id": 298924,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991665.031, "dur": 0.560, + "args": { + "External id": 298925,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991666.213, "dur": 1.160, + "args": { + "External id": 298926,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991666.882, "dur": 0.425, + "args": { + "External id": 298927,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991667.649, "dur": 2.998, + "args": { + "External id": 298928,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991668.626, "dur": 1.950, + "args": { + "External id": 298929,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991672.223, "dur": 1.599, + "args": { + "External id": 298930,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991673.075, "dur": 0.678, + "args": { + "External id": 298931,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991674.103, "dur": 1.585, + "args": { + "External id": 298932,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991675.006, "dur": 0.620, + "args": { + "External id": 298933,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991676.155, "dur": 2.924, + "args": { + "External id": 298934,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991678.444, "dur": 0.567, + "args": { + "External id": 298935,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991679.382, "dur": 1.586, + "args": { + "External id": 298936,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991680.367, "dur": 0.532, + "args": { + "External id": 298937,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991681.337, "dur": 1.668, + "args": { + "External id": 298938,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991682.059, "dur": 0.883, + "args": { + "External id": 298939,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991683.286, "dur": 3.325, + "args": { + "External id": 298940,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991685.744, "dur": 0.795, + "args": { + "External id": 298941,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991686.961, "dur": 1.720, + "args": { + "External id": 298942,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991687.858, "dur": 0.755, + "args": { + "External id": 298943,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991688.953, "dur": 3.437, + "args": { + "External id": 298944,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991689.970, "dur": 2.086, + "args": { + "External id": 298945,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991694.270, "dur": 1.753, + "args": { + "External id": 298946,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991695.458, "dur": 0.501, + "args": { + "External id": 298947,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991696.482, "dur": 1.481, + "args": { + "External id": 298948,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991697.290, "dur": 0.609, + "args": { + "External id": 298949,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991698.209, "dur": 3.545, + "args": { + "External id": 298950,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991701.087, "dur": 0.599, + "args": { + "External id": 298951,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991702.039, "dur": 2.149, + "args": { + "External id": 298952,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991703.059, "dur": 1.064, + "args": { + "External id": 298953,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991704.567, "dur": 1.407, + "args": { + "External id": 298954,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991705.173, "dur": 0.734, + "args": { + "External id": 298955,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991706.219, "dur": 3.164, + "args": { + "External id": 298956,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991708.680, "dur": 0.637, + "args": { + "External id": 298957,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991709.627, "dur": 1.603, + "args": { + "External id": 298958,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991710.587, "dur": 0.574, + "args": { + "External id": 298959,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991711.646, "dur": 3.008, + "args": { + "External id": 298960,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991712.602, "dur": 1.978, + "args": { + "External id": 298961,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070547, "tid": 2070547, + "ts": 5333368991716.990, "dur": 1.572, + "args": { + "External id": 298962,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368991717.839, "dur": 0.659, + "args": { + "External id": 298963,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070547, "tid": 2070547, + "ts": 5333368991737.849, "dur": 136.836, + "args": { + "External id": 298964,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070547, "tid": 2070547, + "ts": 5333368991976.632, "dur": 131.405, + "args": { + "External id": 298965,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070547, "tid": 2070547, + "ts": 5333368992039.158, "dur": 49.193, + "args": { + "External id": 298966,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070547, "tid": 2070547, + "ts": 5333368992053.715, "dur": 1.082, + "args": { + "External id": 298967,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2070547, "tid": 2070547, + "ts": 5333368992524.430, "dur": 982.855, + "args": { + "External id": 298968,"Sequence number": 1209237, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070547, "tid": 2070547, + "ts": 5333368992578.321, "dur": 96.930, + "args": { + "External id": 298969,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368992583.279, "dur": 1.371, + "args": { + "External id": 298970,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368992587.755, "dur": 0.695, + "args": { + "External id": 298971,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2070547, "tid": 2070547, + "ts": 5333368992703.777, "dur": 483.456, + "args": { + "External id": 298972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070547, "tid": 2070547, + "ts": 5333368992708.201, "dur": 51.297, + "args": { + "External id": 298973,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070547, "tid": 2070547, + "ts": 5333368992711.400, "dur": 12.464, + "args": { + "External id": 298974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333368992717.806, "dur": 5.373, + "args": { + "External id": 298975,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070547, "tid": 2070547, + "ts": 5333368992725.500, "dur": 33.412, + "args": { + "External id": 298976,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2070547, "tid": 2070547, + "ts": 5333368992769.705, "dur": 413.493, + "args": { + "External id": 298977,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333368992802.982, "dur": 356.528, + "args": { + "External id": 298978,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 0, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "0", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9390, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2070547, "tid": 2070547, + "ts": 5333368992823.047, "dur": 330.854, + "args": { + "External id": 298979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070547, "tid": 2070547, + "ts": 5333368993254.816, "dur": 213.213, + "args": { + "External id": 298980,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2070547, "tid": 2070547, + "ts": 5333368993352.609, "dur": 34.901, + "args": { + "External id": 298981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070547, "tid": 2070547, + "ts": 5333368993373.079, "dur": 5.225, + "args": { + "External id": 298982,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "0", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9394, "In msg nelems": 0, "Rank": 0, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070547, "tid": 2070547, + "ts": 5333368993418.728, "dur": 43.077, + "args": { + "External id": 298983,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368993421.019, "dur": 1.282, + "args": { + "External id": 298984,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368993423.649, "dur": 0.431, + "args": { + "External id": 298985,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2070547, "tid": 2070547, + "ts": 5333368993525.552, "dur": 20.411, + "args": { + "External id": 298986,"Sequence number": 1209238, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070547, "tid": 2070547, + "ts": 5333368993534.475, "dur": 8.304, + "args": { + "External id": 298987,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070547, "tid": 2070547, + "ts": 5333368993536.994, "dur": 5.621, + "args": { + "External id": 298988,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070547, "tid": 2070547, + "ts": 5333368994087.328, "dur": 44.362, + "args": { + "External id": 298989,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2070547, "tid": 2070547, + "ts": 5333368994141.232, "dur": 48.298, + "args": { + "External id": 298990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2070547, "tid": 2070547, + "ts": 5333368994201.964, "dur": 24.000, + "args": { + "External id": 298991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2070547, "tid": 2070547, + "ts": 5333368994240.442, "dur": 25.189, + "args": { + "External id": 298992,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368994244.104, "dur": 0.600, + "args": { + "External id": 298993,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070547, "tid": 2070547, + "ts": 5333368994283.120, "dur": 0.546, + "args": { + "External id": 298994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070547, "tid": 2070547, + "ts": 5333368994410.534, "dur": 878.224, + "args": { + "External id": 298995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070547, "tid": 2070547, + "ts": 5333368994957.540, "dur": 298.581, + "args": { + "External id": 298996,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2070547, "tid": 2070547, + "ts": 5333368995335.124, "dur": 32.062, + "args": { + "External id": 298997,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070547, "tid": 2070547, + "ts": 5333368995339.251, "dur": 27.378, + "args": { + "External id": 298998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070547, "tid": 2070547, + "ts": 5333368995372.091, "dur": 339.841, + "args": { + "External id": 298999,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070547, "tid": 2070547, + "ts": 5333368995374.012, "dur": 337.508, + "args": { + "External id": 299000,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070547, "tid": 2070547, + "ts": 5333368995376.212, "dur": 334.171, + "args": { + "External id": 299001,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2070547, "tid": 2070547, + "ts": 5333368995720.927, "dur": 67.768, + "args": { + "External id": 299002,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333368995724.182, "dur": 39.880, + "args": { + "External id": 299003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070547, "tid": 2070547, + "ts": 5333368995732.938, "dur": 3.713, + "args": { + "External id": 299004,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070547, "tid": 2070547, + "ts": 5333368995738.535, "dur": 25.215, + "args": { + "External id": 299005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070547, "tid": 2070547, + "ts": 5333368995744.154, "dur": 2.971, + "args": { + "External id": 299006,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070547, "tid": 2070547, + "ts": 5333368995765.884, "dur": 21.800, + "args": { + "External id": 299007,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070547, "tid": 2070547, + "ts": 5333368995791.710, "dur": 74.942, + "args": { + "External id": 299008,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070547, "tid": 2070547, + "ts": 5333368995823.986, "dur": 42.507, + "args": { + "External id": 299009,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070547, "tid": 2070547, + "ts": 5333368995826.097, "dur": 40.102, + "args": { + "External id": 299010,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2070547, "tid": 2070547, + "ts": 5333368995904.946, "dur": 6010.463, + "args": { + "External id": 299011,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2070547, "tid": 2070547, + "ts": 5333368995938.073, "dur": 5955.223, + "args": { + "External id": 299012,"Record function id": 0, "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2070547, "tid": 2070547, + "ts": 5333368997226.945, "dur": 252.749, + "args": { + "External id": 299013,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997247.407, "dur": 1.397, + "args": { + "External id": 299014,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997249.868, "dur": 0.102, + "args": { + "External id": 299015,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997250.424, "dur": 0.210, + "args": { + "External id": 299016,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997251.173, "dur": 0.050, + "args": { + "External id": 299017,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997251.621, "dur": 0.184, + "args": { + "External id": 299018,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997252.189, "dur": 0.200, + "args": { + "External id": 299019,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997252.855, "dur": 0.219, + "args": { + "External id": 299020,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997253.414, "dur": 0.064, + "args": { + "External id": 299021,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997253.869, "dur": 0.204, + "args": { + "External id": 299022,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997254.421, "dur": 0.064, + "args": { + "External id": 299023,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997255.005, "dur": 0.068, + "args": { + "External id": 299024,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997255.428, "dur": 0.067, + "args": { + "External id": 299025,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997255.892, "dur": 0.058, + "args": { + "External id": 299026,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997256.349, "dur": 0.058, + "args": { + "External id": 299027,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997256.787, "dur": 0.053, + "args": { + "External id": 299028,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997257.254, "dur": 0.057, + "args": { + "External id": 299029,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997257.707, "dur": 0.058, + "args": { + "External id": 299030,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997258.112, "dur": 0.058, + "args": { + "External id": 299031,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997258.536, "dur": 0.060, + "args": { + "External id": 299032,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997258.995, "dur": 0.060, + "args": { + "External id": 299033,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997259.469, "dur": 0.059, + "args": { + "External id": 299034,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997259.940, "dur": 0.058, + "args": { + "External id": 299035,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997260.367, "dur": 0.226, + "args": { + "External id": 299036,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997260.959, "dur": 0.056, + "args": { + "External id": 299037,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997261.418, "dur": 0.052, + "args": { + "External id": 299038,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997261.830, "dur": 0.055, + "args": { + "External id": 299039,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997262.301, "dur": 0.059, + "args": { + "External id": 299040,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997262.762, "dur": 0.056, + "args": { + "External id": 299041,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997263.162, "dur": 0.051, + "args": { + "External id": 299042,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997263.534, "dur": 0.055, + "args": { + "External id": 299043,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997263.981, "dur": 0.056, + "args": { + "External id": 299044,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997264.409, "dur": 0.059, + "args": { + "External id": 299045,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997264.865, "dur": 0.052, + "args": { + "External id": 299046,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997265.237, "dur": 0.089, + "args": { + "External id": 299047,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997265.914, "dur": 0.067, + "args": { + "External id": 299048,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997266.319, "dur": 0.066, + "args": { + "External id": 299049,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997266.779, "dur": 0.065, + "args": { + "External id": 299050,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997267.481, "dur": 0.066, + "args": { + "External id": 299051,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997267.856, "dur": 0.065, + "args": { + "External id": 299052,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997268.335, "dur": 0.070, + "args": { + "External id": 299053,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997268.689, "dur": 0.065, + "args": { + "External id": 299054,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997269.187, "dur": 0.054, + "args": { + "External id": 299055,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997269.631, "dur": 0.068, + "args": { + "External id": 299056,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997269.966, "dur": 0.069, + "args": { + "External id": 299057,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997270.472, "dur": 0.068, + "args": { + "External id": 299058,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997270.921, "dur": 0.204, + "args": { + "External id": 299059,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997271.485, "dur": 0.198, + "args": { + "External id": 299060,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997272.058, "dur": 0.233, + "args": { + "External id": 299061,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997272.663, "dur": 0.198, + "args": { + "External id": 299062,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997273.340, "dur": 0.051, + "args": { + "External id": 299063,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997273.771, "dur": 0.067, + "args": { + "External id": 299064,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997274.349, "dur": 0.069, + "args": { + "External id": 299065,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997274.771, "dur": 0.069, + "args": { + "External id": 299066,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997275.231, "dur": 0.067, + "args": { + "External id": 299067,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997275.698, "dur": 0.058, + "args": { + "External id": 299068,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997276.103, "dur": 0.062, + "args": { + "External id": 299069,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997276.535, "dur": 0.065, + "args": { + "External id": 299070,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997276.885, "dur": 0.066, + "args": { + "External id": 299071,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997277.384, "dur": 0.071, + "args": { + "External id": 299072,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997277.828, "dur": 0.067, + "args": { + "External id": 299073,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997278.288, "dur": 0.261, + "args": { + "External id": 299074,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997278.918, "dur": 0.238, + "args": { + "External id": 299075,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997279.578, "dur": 0.222, + "args": { + "External id": 299076,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997280.065, "dur": 0.222, + "args": { + "External id": 299077,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997280.781, "dur": 0.219, + "args": { + "External id": 299078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997281.388, "dur": 0.063, + "args": { + "External id": 299079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997281.966, "dur": 0.058, + "args": { + "External id": 299080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997282.356, "dur": 0.324, + "args": { + "External id": 299081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997283.022, "dur": 0.147, + "args": { + "External id": 299082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997283.643, "dur": 0.198, + "args": { + "External id": 299083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997284.542, "dur": 0.188, + "args": { + "External id": 299084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997285.111, "dur": 0.062, + "args": { + "External id": 299085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997285.522, "dur": 0.064, + "args": { + "External id": 299086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997285.999, "dur": 0.061, + "args": { + "External id": 299087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997286.453, "dur": 0.064, + "args": { + "External id": 299088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997287.007, "dur": 0.064, + "args": { + "External id": 299089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997287.410, "dur": 0.065, + "args": { + "External id": 299090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997287.840, "dur": 0.066, + "args": { + "External id": 299091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997288.325, "dur": 0.064, + "args": { + "External id": 299092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997288.723, "dur": 0.062, + "args": { + "External id": 299093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997289.146, "dur": 0.067, + "args": { + "External id": 299094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997289.602, "dur": 0.070, + "args": { + "External id": 299095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997290.065, "dur": 0.070, + "args": { + "External id": 299096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997290.479, "dur": 0.070, + "args": { + "External id": 299097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997290.977, "dur": 0.058, + "args": { + "External id": 299098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997291.393, "dur": 0.066, + "args": { + "External id": 299099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997291.834, "dur": 0.064, + "args": { + "External id": 299100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997292.268, "dur": 0.070, + "args": { + "External id": 299101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997292.930, "dur": 0.238, + "args": { + "External id": 299102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997293.516, "dur": 0.066, + "args": { + "External id": 299103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997293.958, "dur": 0.069, + "args": { + "External id": 299104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997294.432, "dur": 0.181, + "args": { + "External id": 299105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997295.005, "dur": 0.207, + "args": { + "External id": 299106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997295.599, "dur": 0.106, + "args": { + "External id": 299107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997296.079, "dur": 0.106, + "args": { + "External id": 299108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997296.653, "dur": 0.226, + "args": { + "External id": 299109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997297.145, "dur": 0.068, + "args": { + "External id": 299110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997297.638, "dur": 0.051, + "args": { + "External id": 299111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997297.991, "dur": 0.055, + "args": { + "External id": 299112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997298.477, "dur": 0.052, + "args": { + "External id": 299113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997298.938, "dur": 0.051, + "args": { + "External id": 299114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997299.371, "dur": 0.062, + "args": { + "External id": 299115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997299.963, "dur": 0.065, + "args": { + "External id": 299116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997300.399, "dur": 0.072, + "args": { + "External id": 299117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997300.739, "dur": 0.063, + "args": { + "External id": 299118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997301.132, "dur": 0.067, + "args": { + "External id": 299119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997301.571, "dur": 0.066, + "args": { + "External id": 299120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997302.026, "dur": 0.054, + "args": { + "External id": 299121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997302.469, "dur": 0.056, + "args": { + "External id": 299122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997302.958, "dur": 0.057, + "args": { + "External id": 299123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997303.374, "dur": 0.059, + "args": { + "External id": 299124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997303.915, "dur": 0.055, + "args": { + "External id": 299125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997304.279, "dur": 0.059, + "args": { + "External id": 299126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997304.701, "dur": 0.054, + "args": { + "External id": 299127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997305.146, "dur": 0.083, + "args": { + "External id": 299128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997305.573, "dur": 0.058, + "args": { + "External id": 299129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997305.952, "dur": 0.066, + "args": { + "External id": 299130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997306.349, "dur": 0.067, + "args": { + "External id": 299131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997306.782, "dur": 0.065, + "args": { + "External id": 299132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997307.219, "dur": 0.059, + "args": { + "External id": 299133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997307.655, "dur": 0.063, + "args": { + "External id": 299134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997308.091, "dur": 0.067, + "args": { + "External id": 299135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997308.538, "dur": 0.061, + "args": { + "External id": 299136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997308.999, "dur": 0.064, + "args": { + "External id": 299137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997309.349, "dur": 0.064, + "args": { + "External id": 299138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997309.753, "dur": 0.067, + "args": { + "External id": 299139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997310.201, "dur": 0.050, + "args": { + "External id": 299140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997310.634, "dur": 0.064, + "args": { + "External id": 299141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997311.042, "dur": 0.063, + "args": { + "External id": 299142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997311.382, "dur": 0.063, + "args": { + "External id": 299143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997311.838, "dur": 0.067, + "args": { + "External id": 299144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997312.369, "dur": 0.066, + "args": { + "External id": 299145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997312.811, "dur": 0.262, + "args": { + "External id": 299146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997313.338, "dur": 0.311, + "args": { + "External id": 299147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997313.925, "dur": 0.188, + "args": { + "External id": 299148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997314.520, "dur": 0.218, + "args": { + "External id": 299149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997315.071, "dur": 0.188, + "args": { + "External id": 299150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997315.586, "dur": 0.068, + "args": { + "External id": 299151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997316.055, "dur": 0.065, + "args": { + "External id": 299152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997316.592, "dur": 0.073, + "args": { + "External id": 299153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997317.035, "dur": 0.180, + "args": { + "External id": 299154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997317.571, "dur": 0.237, + "args": { + "External id": 299155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997318.178, "dur": 0.095, + "args": { + "External id": 299156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997318.638, "dur": 0.068, + "args": { + "External id": 299157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997319.062, "dur": 0.066, + "args": { + "External id": 299158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997319.517, "dur": 0.054, + "args": { + "External id": 299159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997319.864, "dur": 0.069, + "args": { + "External id": 299160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997320.271, "dur": 0.064, + "args": { + "External id": 299161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997320.637, "dur": 0.067, + "args": { + "External id": 299162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997321.089, "dur": 0.066, + "args": { + "External id": 299163,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997321.491, "dur": 0.071, + "args": { + "External id": 299164,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997321.940, "dur": 0.069, + "args": { + "External id": 299165,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997322.282, "dur": 0.062, + "args": { + "External id": 299166,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997322.658, "dur": 0.070, + "args": { + "External id": 299167,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997323.059, "dur": 0.057, + "args": { + "External id": 299168,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997323.483, "dur": 0.068, + "args": { + "External id": 299169,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997323.852, "dur": 0.068, + "args": { + "External id": 299170,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997324.295, "dur": 0.066, + "args": { + "External id": 299171,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997324.717, "dur": 0.070, + "args": { + "External id": 299172,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997325.133, "dur": 0.066, + "args": { + "External id": 299173,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997325.529, "dur": 0.068, + "args": { + "External id": 299174,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997325.919, "dur": 0.071, + "args": { + "External id": 299175,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997326.379, "dur": 0.065, + "args": { + "External id": 299176,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997326.827, "dur": 0.064, + "args": { + "External id": 299177,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997327.189, "dur": 0.066, + "args": { + "External id": 299178,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997327.757, "dur": 0.066, + "args": { + "External id": 299179,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997328.237, "dur": 0.067, + "args": { + "External id": 299180,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997328.606, "dur": 0.065, + "args": { + "External id": 299181,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997329.012, "dur": 0.065, + "args": { + "External id": 299182,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997329.630, "dur": 0.053, + "args": { + "External id": 299183,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997329.956, "dur": 0.066, + "args": { + "External id": 299184,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997336.136, "dur": 0.081, + "args": { + "External id": 299185,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997337.761, "dur": 0.055, + "args": { + "External id": 299186,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997338.154, "dur": 0.225, + "args": { + "External id": 299187,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997338.743, "dur": 0.064, + "args": { + "External id": 299188,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997339.337, "dur": 0.225, + "args": { + "External id": 299189,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997339.936, "dur": 0.208, + "args": { + "External id": 299190,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997340.480, "dur": 0.069, + "args": { + "External id": 299191,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997340.922, "dur": 0.064, + "args": { + "External id": 299192,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997341.482, "dur": 0.066, + "args": { + "External id": 299193,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997341.946, "dur": 0.196, + "args": { + "External id": 299194,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997342.493, "dur": 0.068, + "args": { + "External id": 299195,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997342.833, "dur": 0.065, + "args": { + "External id": 299196,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997343.269, "dur": 0.067, + "args": { + "External id": 299197,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997343.666, "dur": 0.070, + "args": { + "External id": 299198,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997344.097, "dur": 0.067, + "args": { + "External id": 299199,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997344.543, "dur": 0.066, + "args": { + "External id": 299200,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997344.980, "dur": 0.053, + "args": { + "External id": 299201,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997345.346, "dur": 0.065, + "args": { + "External id": 299202,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997345.816, "dur": 0.072, + "args": { + "External id": 299203,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997346.159, "dur": 0.067, + "args": { + "External id": 299204,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997346.536, "dur": 0.063, + "args": { + "External id": 299205,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997346.904, "dur": 0.060, + "args": { + "External id": 299206,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997347.302, "dur": 0.065, + "args": { + "External id": 299207,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997347.651, "dur": 0.063, + "args": { + "External id": 299208,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997348.046, "dur": 0.068, + "args": { + "External id": 299209,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997348.517, "dur": 0.063, + "args": { + "External id": 299210,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997348.970, "dur": 0.067, + "args": { + "External id": 299211,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997349.322, "dur": 0.066, + "args": { + "External id": 299212,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997349.835, "dur": 0.069, + "args": { + "External id": 299213,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997350.201, "dur": 0.065, + "args": { + "External id": 299214,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997350.641, "dur": 0.063, + "args": { + "External id": 299215,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997351.063, "dur": 0.056, + "args": { + "External id": 299216,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997351.462, "dur": 0.068, + "args": { + "External id": 299217,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997351.860, "dur": 0.069, + "args": { + "External id": 299218,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997352.306, "dur": 0.070, + "args": { + "External id": 299219,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997352.685, "dur": 0.069, + "args": { + "External id": 299220,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997353.166, "dur": 0.214, + "args": { + "External id": 299221,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997353.677, "dur": 0.243, + "args": { + "External id": 299222,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997354.193, "dur": 0.067, + "args": { + "External id": 299223,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997354.631, "dur": 0.199, + "args": { + "External id": 299224,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997355.230, "dur": 0.165, + "args": { + "External id": 299225,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997355.924, "dur": 0.045, + "args": { + "External id": 299226,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997356.338, "dur": 0.204, + "args": { + "External id": 299227,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997356.830, "dur": 0.187, + "args": { + "External id": 299228,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997357.372, "dur": 0.061, + "args": { + "External id": 299229,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997357.740, "dur": 0.059, + "args": { + "External id": 299230,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997358.147, "dur": 0.060, + "args": { + "External id": 299231,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997358.637, "dur": 0.048, + "args": { + "External id": 299232,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997358.945, "dur": 0.057, + "args": { + "External id": 299233,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997359.334, "dur": 0.061, + "args": { + "External id": 299234,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997359.791, "dur": 0.050, + "args": { + "External id": 299235,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997360.228, "dur": 0.060, + "args": { + "External id": 299236,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997360.653, "dur": 0.060, + "args": { + "External id": 299237,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997361.141, "dur": 0.056, + "args": { + "External id": 299238,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997361.570, "dur": 0.066, + "args": { + "External id": 299239,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997362.006, "dur": 0.069, + "args": { + "External id": 299240,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997362.344, "dur": 0.068, + "args": { + "External id": 299241,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997362.766, "dur": 0.062, + "args": { + "External id": 299242,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997363.148, "dur": 0.067, + "args": { + "External id": 299243,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997363.590, "dur": 0.071, + "args": { + "External id": 299244,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997364.081, "dur": 0.059, + "args": { + "External id": 299245,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997364.535, "dur": 0.054, + "args": { + "External id": 299246,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997364.941, "dur": 0.057, + "args": { + "External id": 299247,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997365.346, "dur": 0.055, + "args": { + "External id": 299248,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997365.677, "dur": 0.058, + "args": { + "External id": 299249,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997366.168, "dur": 0.246, + "args": { + "External id": 299250,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997366.753, "dur": 0.099, + "args": { + "External id": 299251,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997367.151, "dur": 0.062, + "args": { + "External id": 299252,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997367.524, "dur": 0.062, + "args": { + "External id": 299253,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997367.950, "dur": 0.059, + "args": { + "External id": 299254,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997368.359, "dur": 0.056, + "args": { + "External id": 299255,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997368.714, "dur": 0.060, + "args": { + "External id": 299256,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997369.079, "dur": 0.059, + "args": { + "External id": 299257,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997369.534, "dur": 0.056, + "args": { + "External id": 299258,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997369.919, "dur": 0.060, + "args": { + "External id": 299259,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997370.305, "dur": 0.076, + "args": { + "External id": 299260,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997370.739, "dur": 0.204, + "args": { + "External id": 299261,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997371.246, "dur": 0.066, + "args": { + "External id": 299262,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997371.721, "dur": 0.067, + "args": { + "External id": 299263,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997372.186, "dur": 0.064, + "args": { + "External id": 299264,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997372.625, "dur": 0.054, + "args": { + "External id": 299265,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997372.975, "dur": 0.066, + "args": { + "External id": 299266,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997373.441, "dur": 0.065, + "args": { + "External id": 299267,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997373.908, "dur": 0.068, + "args": { + "External id": 299268,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997374.599, "dur": 0.061, + "args": { + "External id": 299269,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997375.007, "dur": 0.068, + "args": { + "External id": 299270,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997375.382, "dur": 0.067, + "args": { + "External id": 299271,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997375.767, "dur": 0.066, + "args": { + "External id": 299272,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997376.215, "dur": 0.062, + "args": { + "External id": 299273,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997376.712, "dur": 0.065, + "args": { + "External id": 299274,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997377.121, "dur": 0.067, + "args": { + "External id": 299275,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997377.504, "dur": 0.070, + "args": { + "External id": 299276,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997377.924, "dur": 0.063, + "args": { + "External id": 299277,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997378.334, "dur": 0.063, + "args": { + "External id": 299278,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997378.746, "dur": 0.068, + "args": { + "External id": 299279,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997379.186, "dur": 0.087, + "args": { + "External id": 299280,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997379.573, "dur": 0.061, + "args": { + "External id": 299281,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997379.993, "dur": 0.061, + "args": { + "External id": 299282,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997380.440, "dur": 0.066, + "args": { + "External id": 299283,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997380.873, "dur": 0.067, + "args": { + "External id": 299284,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997381.206, "dur": 0.064, + "args": { + "External id": 299285,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997381.627, "dur": 0.066, + "args": { + "External id": 299286,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997381.959, "dur": 0.057, + "args": { + "External id": 299287,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997382.404, "dur": 0.055, + "args": { + "External id": 299288,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997382.782, "dur": 0.055, + "args": { + "External id": 299289,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997383.212, "dur": 0.068, + "args": { + "External id": 299290,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997383.660, "dur": 0.067, + "args": { + "External id": 299291,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997384.039, "dur": 0.062, + "args": { + "External id": 299292,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997384.666, "dur": 0.064, + "args": { + "External id": 299293,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997385.053, "dur": 0.225, + "args": { + "External id": 299294,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997385.636, "dur": 0.070, + "args": { + "External id": 299295,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997386.064, "dur": 0.089, + "args": { + "External id": 299296,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997386.451, "dur": 0.255, + "args": { + "External id": 299297,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997387.050, "dur": 0.184, + "args": { + "External id": 299298,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997391.561, "dur": 0.211, + "args": { + "External id": 299299,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997392.204, "dur": 0.197, + "args": { + "External id": 299300,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997392.797, "dur": 0.211, + "args": { + "External id": 299301,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997393.561, "dur": 0.057, + "args": { + "External id": 299302,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997393.997, "dur": 0.062, + "args": { + "External id": 299303,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997394.422, "dur": 0.066, + "args": { + "External id": 299304,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070547, "tid": 2070547, + "ts": 5333368997394.874, "dur": 0.061, + "args": { + "External id": 299305,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070547, "tid": 2070547, + "ts": 5333368997990.833, "dur": 3803.161, + "args": { + "External id": 299306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "3.6769426533212504e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070547, "tid": 2070547, + "ts": 5333369001217.650, "dur": 366.176, + "args": { + "External id": 299307,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "3.6769426533212504e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9719 + } + }, + { + "name": "process_name", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 0, + "args": { + "sort_index": 2070547 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 2070547, + "args": { + "name": "thread 2070547 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 2070547, + "args": { + "sort_index": 2070547 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 2107622, + "args": { + "name": "thread 2107622 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 2107622, + "args": { + "sort_index": 2107622 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 2107622, + "args": { + "name": "thread 2107622 (pt_autograd_0)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952597.599, "pid": 2070547, "tid": 2107622, + "args": { + "sort_index": 2107622 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 5333366952531.627, "dur": 2051067.444, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5333366952531.627, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 5333366952531.627 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 5333369045868.268 + } + ], + "traceName": "exp/top.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace/iteration_14848/rank0_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file